quick comparison on deposited and reprocessed bigwig files
corr_df <- data.frame(data.frame(cov.combined)[,c(9,6,7,8)],data.frame(cov.geo)[,6:8],data.frame(cov.mocks)[,6:7],data.frame(cov.pds)[,6:7],data.frame(cov.pdc)[,6:7])
res <- cor(corr_df[,2:13])
corrplot(res, type = "upper", order = "hclust",tl.col = "black", tl.srt = 45,addCoef.col = 'black',tl.pos = 'd', cl.pos = 'n', col.lim=c(0.93, 1),is.corr = F, method = 'color')

p1 <- plot_bw_profile(c(mocks_bigwigs[1],pds_bigwigs[1],pdc_bigwigs[1]),loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,2.4))
p2 <- plot_bw_profile(c(mocks_bigwigs[2],pds_bigwigs[2],pdc_bigwigs[2]),loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,2.4))
p1 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,5))
p2 <- plot_bw_profile(bwfiles = combined_bigwigs,loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,2.6))
ggarrange(p1,p2, ncol=2)

p1 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = "../data/peaks/CTCF-only_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,5))
Warning in .summarize_matrix(fg, label) :
Profile plot: 324 generated ( 0.0538563829787234 per locus)
Warning in .summarize_matrix(fg, label) :
Profile plot: 324 generated ( 0.0538563829787234 per locus)
Warning in .summarize_matrix(fg, label) :
Profile plot: 324 generated ( 0.0538563829787234 per locus)
p2 <- plot_bw_profile(bwfiles = combined_bigwigs,loci = "../data/peaks/CTCF-only_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,2.6))
Warning in .summarize_matrix(fg, label) :
Profile plot: 324 generated ( 0.0538563829787234 per locus)
Warning in .summarize_matrix(fg, label) :
Profile plot: 324 generated ( 0.0538563829787234 per locus)
Warning in .summarize_matrix(fg, label) :
Profile plot: 324 generated ( 0.0538563829787234 per locus)
ggarrange(p1,p2, ncol=2)

p1 <- plot_bw_loci_scatter(mocks_bigwigs[1],mocks_bigwigs[2], loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed", verbose = F)
p2 <- plot_bw_loci_scatter(pds_bigwigs[1],pds_bigwigs[2], loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed", verbose = F)
p3 <- plot_bw_loci_scatter(pdc_bigwigs[1],pdc_bigwigs[2], loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed", verbose = F)
ggarrange(p1,p2,p3, ncol=3)

p1 <- plot_bw_loci_scatter(combined_bigwigs[1],combined_bigwigs[2], loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed", verbose = F)
p2 <- plot_bw_loci_scatter(combined_bigwigs[2],combined_bigwigs[3], loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed", verbose = F)
p3 <- plot_bw_loci_scatter(combined_bigwigs[1],combined_bigwigs[3], loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed", verbose = F)
ggarrange(p1,p2,p3, ncol=3)

results <- data.frame(
as.data.frame(cov.mocks),
as.data.frame(cov.pds)[6:7],
as.data.frame(cov.pdc)[6:7],
as.data.frame(cov.G4)[6],
raw.lfc.pds_1 = log2(cov.pds$PDS_1 / cov.mocks$mock_1),
raw.lfc.pds_2 = log2(cov.pds$PDS_2 / cov.mocks$mock_2),
raw.lfc.pdc_1 = log2(cov.pds$PDS_1 / cov.mocks$mock_1),
raw.lfc.pdc_2 = log2(cov.pdc$PhenDC3_2 / cov.mocks$mock_2),
raw.lfc.pds = log2(rowMeans(as.data.frame(cov.pds)[6:7]) / rowMeans(as.data.frame(cov.mocks)[6:7])),
raw.lfc.pdc = log2(rowMeans(as.data.frame(cov.pdc)[6:7]) / rowMeans(as.data.frame(cov.mocks)[6:7])),
mean.mock = rowMeans(as.data.frame(cov.mocks)[6:7]),
mean.pds = rowMeans(as.data.frame(cov.pds)[6:7]),
mean.pdc = rowMeans(as.data.frame(cov.pdc)[6:7])
)
cov.mocks$name <- NULL
cov.pds$name <- NULL
cov.pdc$name <- NULL
de_pds <- bw_granges_diff_analysis(cov.mocks, cov.pds, "Mock", "PDS")
converting counts to integer mode
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
lfc_pds = DESeq2::lfcShrink(de_pds, coef = "condition_PDS_vs_Mock", type = "apeglm")
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
sequence count data: removing the noise and preserving large differences.
Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
de_pdc <- bw_granges_diff_analysis(cov.mocks, cov.pdc, "Mock", "PhenDC3")
converting counts to integer mode
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
lfc_pdc = DESeq2::lfcShrink(de_pdc, coef = "condition_PhenDC3_vs_Mock", type = "apeglm")
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
sequence count data: removing the noise and preserving large differences.
Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
results$deseq.lfc.pds <- results(de_pds)$log2FoldChange
results$deseq.lfcs.pds <- lfc_pds$log2FoldChange
results$deseq.padj.pds <- lfc_pds$padj
results$deseq.mean.pds <- log2(lfc_pds$baseMean)
results$deseq.sig.pds <- lfc_pds$pvalue < 0.05
results$deseq.lfc.pdc <- results(de_pdc)$log2FoldChange
results$deseq.lfcs.pdc <- lfc_pdc$log2FoldChange
results$deseq.padj.pdc <- lfc_pdc$padj
results$deseq.mean.pdc <- log2(lfc_pdc$baseMean)
results$deseq.sig.pdc <- lfc_pdc$pvalue < 0.05
results$class <- gsub(" .+", "", results$name)
results$pro <- gsub(".+ ", "", results$name)
results$pro <- factor(results$pro, levels = c("Pro", "noPro"))
results$class <- factor(results$class, levels = c("CTCF_and_G4", "CTCF_not_G4"))
results$log2.mock <- log2(results$mean.mock)
results$log2.pds <- log2(results$mean.pds)
results$log2.pdc <- log2(results$mean.pdc)
results$log2.G4 <- log2(results$G4_NT)
results$deseq.sigup.pds <- results$deseq.sig.pds &
results$deseq.lfc.pds > 0
results$deseq.sigup.pdc <- results$deseq.sig.pdc &
results$deseq.lfc.pdc > 0
write.table(results, glue("{result_folder}foldchange_results.txt"))
table(results$name)
CTCF_and_G4 noPro CTCF_and_G4 Pro CTCF_not_G4 noPro CTCF_not_G4 Pro
998 1314 43304 6013
#results <- read.table("foldchange_results.txt")
results$class <- factor(results$class, levels = c("CTCF_and_G4", "CTCF_not_G4"))
p <- ggviolin(
results,
x = "class",
y = "mean.mock",
fill = "class",
palette = mypal,
add = "median_iqr"
) + coord_cartesian(ylim = c(0, 10))
annotate_figure(p, fig.lab = "CTCF signal by class in mock condition, mean of two reps, median+iqr", fig.lab.size = 6)

ggsave(glue("{plot_folder}Violin_CTCF_classes.pdf"), last_plot())
Saving 3 x 3 in image
p <- ggviolin(
results,
x = "class",
y = "mean.mock",
fill = "pro",
palette = mypal,
add = "median_iqr"
) + coord_cartesian(ylim = c(0, 10))
annotate_figure(p, fig.lab = "CTCF signal by class in mock condition, mean of two reps, median+iqr", fig.lab.size = 6)

ggsave(glue("{plot_folder}Violin_CTCF_classes_pro.pdf"),
last_plot())
Saving 3 x 3 in image
mdf <- reshape2::melt(dplyr::select(
results,
c(
"class",
"mock_1",
"mock_2",
"PDS_1",
"PDS_2",
"PhenDC3_1",
"PhenDC3_2"
)
))
Using class as id variables
ggboxplot(
mdf,
x = "variable",
y = "value",
fill = "class",
palette = mypal
) + coord_cartesian(ylim = c(0, 10))

ggsave(glue("{plot_folder}Boxplot_CTCF_reps.pdf"), last_plot())
Saving 5 x 3 in image
mdf <- reshape2::melt(dplyr::select(
results,
c(
"class",
"mock_1",
"mock_2",
"PDS_1",
"PDS_2",
"PhenDC3_1",
"PhenDC3_2"
)
))
Using class as id variables
mdf_stats_class = compare_means(value ~ class, group.by = "variable", data = mdf)
ggviolin(
mdf,
x = "variable",
y = "value",
fill = "class",
palette = mypal,
add = "median_iqr"
) + coord_cartesian(ylim = c(0, 10))

ggsave(glue("{plot_folder}Violin_CTCF_reps.pdf"), last_plot())
Saving 5 x 3 in image
mdf <- reshape2::melt(dplyr::select(results, c(
"class", "deseq.lfc.pds", "deseq.lfc.pdc"
)))
Using class as id variables
p <- ggviolin(
mdf,
x = "variable",
y = "value",
fill = "class",
palette = mypal,
add = "median_iqr"
) + geom_hline(yintercept = 0, linetype = "dotted") + coord_cartesian(ylim =
c(-3, 3)) + stat_compare_means(aes(group = class), label.y = 3, size = 2)
annotate_figure(p, fig.lab = "DESEq foldchange mean treat vs mean mock, median+iqr", fig.lab.size = 6)
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_compare_means()`).

ggsave(
glue("{plot_folder}Violin_CTCF_lfc.pdf"),
last_plot(),
width = 5,
height = 5
)
p <- ggboxplot(
mdf,
x = "variable",
y = "value",
fill = "class",
palette = mypal
) + geom_hline(yintercept = 0, linetype = "dotted") + coord_cartesian(ylim =
c(-2, 2))
annotate_figure(p, fig.lab = "DESEq foldchange mean treat vs mean mock, median+iqr", fig.lab.size = 6)
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_boxplot()`).

ggsave(glue("{plot_folder}Boxplot_CTCF_lfc.pdf"), last_plot())
Saving 3 x 3 in image
mdf <- reshape2::melt(dplyr::select(results, c(
"pro", "class", "deseq.lfc.pds", "deseq.lfc.pdc"
)))
Using pro, class as id variables
mdf$pro <- factor(mdf$pro, levels = c("Pro", "noPro"))
mdf$x <- as.factor(paste0(mdf$class, " ", mdf$variable))
mdf$x <- factor(mdf$x, levels = levels(mdf$x)[c(2, 1, 4, 3)])
p <- ggviolin(
mdf,
x = "x",
y = "value",
fill = "class",
palette = mypal,
add = "median_iqr",
facet.by = "pro"
) + geom_hline(yintercept = 0, linetype = "dotted") + coord_cartesian(ylim =
c(-2, 2)) + theme(axis.text.x = element_text(angle = 90, hjust = 1))
annotate_figure(p, fig.lab = "DESEq foldchange mean treat vs mean mock, median+iqr", fig.lab.size = 6)
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).

ggsave(glue("{plot_folder}Violin_CTCF_lfc_pro.pdf"), last_plot())
Saving 5 x 6 in image
med <- aggregate(deseq.lfc.pds ~ class,
data = results,
FUN = "median",
na.rm = T)
med$deseq.fc.pds <- 2 ^ med$deseq.lfc.pds
med
med <- aggregate(deseq.lfc.pds ~ name,
data = results,
FUN = "median",
na.rm = T)
med$deseq.fc.pds <- 2 ^ med$deseq.lfc.pds
med
med <- aggregate(deseq.lfc.pds ~ class,
data = results,
FUN = "median",
na.rm = T)
med$deseq.fc.pds <- 2 ^ med$deseq.lfc.pds
med
med <- aggregate(deseq.lfc.pdc ~ name,
data = results,
FUN = "median",
na.rm = T)
med$deseq.fc.pdc <- 2 ^ med$deseq.lfc.pdc
med
deseq_lfc_stats = compare_means(deseq.lfc.pds ~ class, data = results)
deseq_lfc_stats
write_tsv(deseq_lfc_stats,
glue("{stat_output}pds-deseq2_lfc_statistics.tsv"))
deseq_lfc_stats = compare_means(deseq.lfc.pdc ~ class, data = results)
deseq_lfc_stats
write_tsv(deseq_lfc_stats,
glue("{stat_output}phendc-deseq2_lfc_statistics.tsv"))
mdf <- reshape2::melt(dplyr::select(
results,
c(
"class",
"raw.lfc.pds_1",
"raw.lfc.pds_2",
"raw.lfc.pdc_1",
"raw.lfc.pdc_2"
)
))
Using class as id variables
ggviolin(
mdf,
x = "variable",
y = "value",
fill = "class",
palette = mypal,
add = "median_iqr"
) + geom_hline(yintercept = 0, linetype = "dotted") + coord_cartesian(ylim =
c(-3, 3)) +
stat_compare_means(aes(group = class), label.y = 3.6, size = 2)
Warning: Removed 469 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 469 rows containing non-finite outside the scale range (`stat_summary()`).
Warning: Removed 469 rows containing non-finite outside the scale range
(`stat_compare_means()`).

ggsave(glue("{plot_folder}Violin_CTCF_lfc_individual_reps.pdf"), last_plot())
Saving 5 x 3 in image
Warning: Removed 469 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 469 rows containing non-finite outside the scale range (`stat_summary()`).
Warning: Removed 469 rows containing non-finite outside the scale range
(`stat_compare_means()`).
ggdensity(
results,
x = "raw.lfc.pds",
color = "class",
fill = "class",
palette = mypal
) + geom_vline(xintercept = 0, linetype = "dotted")
Warning: Removed 26 rows containing non-finite outside the scale range (`stat_density()`).

ggdensity(
results,
x = "raw.lfc.pdc",
color = "class",
fill = "class",
palette = mypal
) + geom_vline(xintercept = 0, linetype = "dotted")
Warning: Removed 23 rows containing non-finite outside the scale range (`stat_density()`).

ggscatter(
results,
x = "log2.mock",
y = "log2.pds",
size = 1,shape=20,
alpha = 0.1,
color = "deseq.sig.pds",
palette = mypal[c(5, 2)]
)

ggsave(glue("{plot_folder}Scatter_CTCF_DESEq_PDSvsMock.pdf"), rasterize(last_plot(),dpi = 600),width = 3, height= 3.2)
ggscatter(
results,
x = "log2.mock",
y = "log2.pdc",
size = 1,shape=20,
alpha = 0.1,
color = "deseq.sig.pdc",
palette = mypal[c(5, 2)]
)

ggsave(glue("{plot_folder}Scatter_CTCF_DESEq_PhenDC3vsMock.pdf"), rasterize(last_plot(),dpi = 600),width = 3, height= 3.2)
tb <- table(results$class)
deseq.stats <- as.data.frame(t(as.matrix(tb)))
deseq.stats[1,] <- colSums(deseq.stats)
rownames(deseq.stats) <- c("Total")
deseq.stats.percent <- deseq.stats
deseq.stats.percent[1,] <- c(100,100)
tb
CTCF_and_G4 CTCF_not_G4
2312 49317
tb <- table(results$deseq.sig.pds &
(results$deseq.lfc.pds > 0),
results$class)
deseq.stats <- rbind(deseq.stats, PDS.sig.up = as.data.frame.matrix(tb)[2,])
tb
CTCF_and_G4 CTCF_not_G4
FALSE 2202 46826
TRUE 110 2488
tb <- prop.table(table(results$deseq.sig.pds &
(results$deseq.lfc.pds > 0),
results$class),margin = 2)*100
deseq.stats.percent <- rbind(deseq.stats.percent, PDS.sig.up = as.data.frame.matrix(tb)[2,])
tb
CTCF_and_G4 CTCF_not_G4
FALSE 95.242215 94.954780
TRUE 4.757785 5.045220
table(results$deseq.sig.pds &
(results$deseq.lfc.pds > 0),
results$class)
CTCF_and_G4 CTCF_not_G4
FALSE 2202 46826
TRUE 110 2488
mdf <- reshape2::melt(table(
results$deseq.sig.pds & (results$deseq.lfc.pds > 0),
results$class
))
ggplot(mdf, aes(Var1, Var2, fill = value)) +
geom_tile(show.legend = F) + geom_text(aes(label = value)) +
scale_fill_gradient(low = "white", high = "orange") + theme_minimal()

vl <- list(
sig = grep("TRUE", results$deseq.sigup.pds),
CTCF_G4 = grep("and", results$class),
CTCFonly = grep("not", results$class)
)
plot(euler(vl), quantities = T)

tb <- table(results$deseq.sig.pdc &
(results$deseq.lfc.pdc > 0),
results$class)
deseq.stats <- rbind(deseq.stats, PhenDC3.sig.up = as.data.frame.matrix(tb)[2,])
tb
CTCF_and_G4 CTCF_not_G4
FALSE 2237 48550
TRUE 75 767
table(results$deseq.sig.pdc &
(results$deseq.lfc.pdc < 0),
results$class)
CTCF_and_G4 CTCF_not_G4
FALSE 2305 49155
TRUE 7 162
tb <- prop.table(table(results$deseq.sig.pdc &
(results$deseq.lfc.pdc > 0),
results$class),margin = 2)*100
deseq.stats.percent <- rbind(deseq.stats.percent, PhenDC3.sig.up = as.data.frame.matrix(tb)[2,])
tb
CTCF_and_G4 CTCF_not_G4
FALSE 96.756055 98.444755
TRUE 3.243945 1.555245
table(results$deseq.sig.pdc &
(results$deseq.lfc.pdc > 0),
results$class)
CTCF_and_G4 CTCF_not_G4
FALSE 2237 48550
TRUE 75 767
mdf <- reshape2::melt(table(
results$deseq.sig.pdc & (results$deseq.lfc.pdc > 0),
results$class
))
ggplot(mdf, aes(Var1, Var2, fill = value)) +
geom_tile(show.legend = F) + geom_text(aes(label = value)) +
scale_fill_gradient(low = "white", high = "orange") + theme_minimal()

results$uid <- seq(1:nrow(results))
vl <- list(
sig = grep("TRUE", results$deseq.sigup.pdc),
CTCF_G4 = grep("and", results$class),
CTCFonly = grep("not", results$class)
)
plot(euler(vl), quantities = T)

table(results$deseq.sigup.pds, results$deseq.sigup.pdc)
FALSE TRUE
FALSE 48558 470
TRUE 2226 372
mdf <- reshape2::melt(table(results$deseq.sigup.pds, results$deseq.sigup.pdc))
ggplot(mdf, aes(Var1, Var2, fill = value)) +
geom_tile(show.legend = F) + geom_text(aes(label = value)) +
scale_fill_gradient(low = "white", high = "orange") + theme_minimal()

tb <- table(results$deseq.sigup.pds & results$deseq.sigup.pdc,
results$class)
deseq.stats <- rbind(deseq.stats, both.sig.up = as.data.frame.matrix(tb)[2,])
tb
CTCF_and_G4 CTCF_not_G4
FALSE 2282 48975
TRUE 30 342
tb <- prop.table(table(results$deseq.sigup.pds & results$deseq.sigup.pdc,
results$class),margin = 2)*100
deseq.stats.percent <- rbind(deseq.stats.percent, both = as.data.frame.matrix(tb)[2,])
tb
CTCF_and_G4 CTCF_not_G4
FALSE 98.7024221 99.3065272
TRUE 1.2975779 0.6934728
table(results$deseq.sigup.pds & results$deseq.sigup.pdc, results$class)
CTCF_and_G4 CTCF_not_G4
FALSE 2282 48975
TRUE 30 342
mdf <- reshape2::melt(table(results$deseq.sigup.pds & results$deseq.sigup.pdc, results$class))
ggplot(mdf, aes(Var1, Var2, fill = value)) +
geom_tile(show.legend = F) + geom_text(aes(label = value)) +
scale_fill_gradient(low = "white", high = "orange") + theme_minimal()

deseq.stats$class <- rownames(deseq.stats)
mdf <- reshape2::melt(deseq.stats)
Using class as id variables
ggplot(mdf, aes(variable, class, fill = value)) +
geom_tile(show.legend = F) + geom_text(aes(label = value)) +
scale_fill_gradient(low = "white", high = "orange") + theme_minimal()

ggsave(glue("{plot_folder}Table_CTCF_DESEq_Sigup.pdf"), rasterize(last_plot(),dpi = 600),width = 2.5, height= 2.5)
deseq.stats.percent$class <- rownames(deseq.stats.percent)
mdf <- reshape2::melt(deseq.stats.percent[-1,])
Using class as id variables
ggplot(mdf, aes(variable, class, fill = value)) +
geom_tile(show.legend = F) + geom_text(aes(label = round(mdf$value,2))) +
scale_fill_gradient(low = "white", high = "orange") + theme_minimal()
Warning: Use of `mdf$value` is discouraged.
ℹ Use `value` instead.

ggsave(glue("{plot_folder}Table_CTCF_DESEq_Sigup_percent.pdf"), rasterize(last_plot(),dpi = 600),width = 2.5, height= 2.5)
Warning: Use of `mdf$value` is discouraged.
ℹ Use `value` instead.
vl <- list(
sig_PDS = grep("TRUE", results$deseq.sigup.pds),
sig_PDC = grep("TRUE", results$deseq.sigup.pdc),
CTCF_G4 = grep("and", results$class),
CTCFonly = grep("not", results$class)
)
plot(euler(vl), quantities = T)

results$uid <- seq(1:nrow(results))
vl <- list(
sig_PDS = grep("TRUE", results$deseq.sigup.pds),
sig_PDC = grep("TRUE", results$deseq.sigup.pdc),
CTCF_G4 = grep("and", results$class)
)
plot(euler(vl), quantities = T)

results$sig.by.class.pds <- paste0(results$deseq.sigup.pds, "_", results$class)
results$psize <- 0.01
results$psize[results$deseq.sigup.pds &
results$class == "CTCF_and_G4"] <- 1
ggscatter(
results,
x = "log2.mock",
y = "log2.pds",
size = 0.5,
alpha = results$psize,
color = "sig.by.class.pds",
palette = c(mypal[5], mypal[5], mypal[5], mypal[2], mypal[1])
)

ggscatter(
results[!grepl("NA", results$sig.by.class.pds), ],
x = "log2.mock",
y = "deseq.lfc.pds",
size = 0.2,
alpha = "psize",
color = "sig.by.class.pds",
palette = c("#505050", "#505050", "red2", "blue")
)

ggscatterhist(
results[!grepl("NA", results$sig.by.class.pds), ],
x = "log2.mock",
y = "log2.pds",
size = 0.4,
alpha = "psize",
color = "sig.by.class.pds",
margin.params = list(
fill = "sig.by.class.pds",
color = "black",
size = 0.2
),
palette = c("#505050", "#505050", "red2", "blue")
)
Warning: Removed 13 rows containing non-finite outside the scale range (`stat_density()`).
Warning: Removed 10 rows containing non-finite outside the scale range (`stat_density()`).


ggscatter(
results,
x = "raw.lfc.pds",
y = "log2.G4",
size = 0.2,
alpha = 0.1,
color = "deseq.sig.pds",
cor.coef = T,
palette = c("#888888",mypal[2])
)
Warning: Removed 16801 rows containing non-finite outside the scale range (`stat_cor()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).

ggsave(glue("{plot_folder}Scatter_G4_vs_LFC_PDS.pdf"),
plot = rasterize(last_plot()))
Saving 3 x 3 in image
Warning: Removed 16801 rows containing non-finite outside the scale range (`stat_cor()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
ggscatter(
results,
x = "raw.lfc.pdc",
y = "log2.G4",
size = 0.2,
alpha = 0.1,
color = "deseq.sig.pdc",
cor.coef = T,
palette = c("#888888",mypal[2])
)
Warning: Removed 16798 rows containing non-finite outside the scale range (`stat_cor()`).

ggsave(glue("{plot_folder}Scatter_G4_vs_LFC_PDC.pdf"),
plot = rasterize(last_plot()))
Saving 3 x 3 in image
Warning: Removed 16798 rows containing non-finite outside the scale range (`stat_cor()`).
results$G4.quantile <- dplyr::ntile(results$G4_NT, n = 5)
results$pds.quantile <- dplyr::ntile(results$mean.pds-results$mean.mock, n = 4)
results$pdc.quantile <- dplyr::ntile(results$mean.pdc-results$mean.mock, n = 4)
top25.pds <- makeGRangesFromDataFrame(results[results$pds.quantile==4,1:3],na.rm=T)
bot75.pds <- makeGRangesFromDataFrame(results[results$pds.quantile!=4,1:3],na.rm=T)
top25.pds.CTCF_G4 <- makeGRangesFromDataFrame(results[(results$pds.quantile==4 & results$class == "CTCF_and_G4"),1:3],na.rm=T)
top25.pds.CTCF_only <- makeGRangesFromDataFrame(results[(results$pds.quantile==4 & results$class == "CTCF_not_G4"),1:3],na.rm=T)
top25.pdc <- makeGRangesFromDataFrame(results[results$pdc.quantile==4,1:3],na.rm=T)
bot75.pdc <- makeGRangesFromDataFrame(results[results$pdc.quantile!=4,1:3],na.rm=T)
top25.pdc.CTCF_G4 <- makeGRangesFromDataFrame(results[(results$pdc.quantile==4 & results$class == "CTCF_and_G4"),1:3],na.rm=T)
top25.pdc.CTCF_only <- makeGRangesFromDataFrame(results[(results$pdc.quantile==4 & results$class == "CTCF_not_G4"),1:3],na.rm=T)
vl <- list(
CTCF_G4 = top25.pds.CTCF_G4,
CTCF_only = top25.pds.CTCF_only,
G4motif = rtracklayer::import("../data/peaks/G4H_mm10_1.75.bed")
)
bedscout::plot_euler(grlist = vl,names = c("CTCF_G4","CTCF_only","G4motif"))

ggviolin(
results,
x = "pds.quantile",
y = "deseq.lfc.pds",
fill = mypal[3],
add = "median_iqr"
) + coord_cartesian(ylim = c(-1.5, 1.5)) + geom_hline(yintercept = 0, linetype =
"dotted")
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).

ggsave(glue("{plot_folder}Violin_lfcPDS_by_PDSquantile.pdf"),
plot = last_plot())
Saving 3 x 3 in image
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).
ggviolin(
results,
x = "pdc.quantile",
y = "deseq.lfc.pdc",
fill = mypal[3],
add = "median_iqr"
) + coord_cartesian(ylim = c(-1.5, 1.5)) + geom_hline(yintercept = 0, linetype =
"dotted")

ggsave(glue("{plot_folder}Violin_lfcPDS_by_PhenDC3quantile.pdf"),
plot = last_plot())
Saving 3 x 3 in image
tb <- table(results$pds.quantile, results$class)
tb
CTCF_and_G4 CTCF_not_G4
1 440 12468
2 523 12384
3 544 12363
4 805 12102
ggbarplot(as.data.frame(t(as.matrix(tb))),"Var2","Freq",fill="Var1",palette = c(mypal[1],"#aaaaaa"),label = TRUE, lab.pos="in",ylim=c(11000,13000))

ggsave(glue("{plot_folder}Barplot_peakCat_by_PDSquantile.pdf"),
plot = last_plot())
Saving 4 x 3 in image
G4.motifs <- rtracklayer::import('../data/peaks/G4H_mm10_1.75.bed')
G4.motifs$type = rep("G4",length(G4.motifs))
ol <- bedscout::annotate_overlapping_features( makeGRangesFromDataFrame(results[,1:3],na.rm=T),feat_gr = G4.motifs,name_field = "type")$nearby_features
ol[is.na(ol)] <- "none"
tb <- table(results$pds.quantile,Var1=ol)
tb
Var1
G4 none
1 2072 10836
2 1985 10922
3 1879 11028
4 1686 11221
ggbarplot(as.data.frame(t(as.matrix(tb))),"Var2","Freq",fill="Var1",palette = c(mypal[1],"#aaaaaa"),label = TRUE, lab.pos="in",ylim=c(11000,13000))

ggsave(glue("{plot_folder}Barplot_G4motif_by_PDSquantile.pdf"),
plot = last_plot())
Saving 4 x 3 in image
table(ol,results$pds.quantile, results$class)
, , = CTCF_and_G4
ol 1 2 3 4
G4 160 183 173 209
none 280 340 371 596
, , = CTCF_not_G4
ol 1 2 3 4
G4 1912 1802 1706 1477
none 10556 10582 10657 10625
tb <- table(results$pdc.quantile, results$class)
tb
ggbarplot(as.data.frame(t(as.matrix(tb))),"Var2","Freq",fill="Var1",palette = c(mypal[1],"#aaaaaa"),label = TRUE, lab.pos="in",ylim=c(11000,13000))
ggsave(glue("{plot_folder}Barplot_peakCat_by_PhenDC3quantile.pdf"),
plot = last_plot())
p1 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = top25.pds, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,6))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = bot75.pds, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,6))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCF_top25_PDSquantile.pdf"),
plot = last_plot())
top25.pdc <- makeGRangesFromDataFrame(results[results$pdc.quantile==4,1:3],na.rm=T)
bot75.pdc <- makeGRangesFromDataFrame(results[results$pdc.quantile!=4,1:3],na.rm=T)
p1 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = top25.pdc, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,6))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = bot75.pdc, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,6))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCF_top25_PhenDC3quantile.pdf"),
plot = last_plot())
p1 <- plot_bw_profile(bwfiles = geo_bigwigs[1:3],loci = top25.pds.CTCF_G4, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,10))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs[1:3],loci = top25.pds.CTCF_only, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,10))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCF_top25_PDSquantile_byG4overlap.pdf"),
plot = last_plot())
top25.pdc.CTCF_G4 <- makeGRangesFromDataFrame(results[(results$pdc.quantile==4 & results$class == "CTCF_and_G4"),1:3],na.rm=T)
top25.pdc.CTCF_only <- makeGRangesFromDataFrame(results[(results$pdc.quantile==4 & results$class == "CTCF_not_G4"),1:3],na.rm=T)
p1 <- plot_bw_profile(bwfiles = geo_bigwigs[c(1,3)],loci = top25.pdc.CTCF_G4, mode = "center",verbose = F, colors = mypal[c(1,3)]) + coord_cartesian(ylim=c(0,10))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs[c(1,3)],loci = top25.pdc.CTCF_only, mode = "center",verbose = F, colors = mypal[c(1,3)]) + coord_cartesian(ylim=c(0,10))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCF_top25_PhenDC3quantile_byG4overlap.pdf"),
plot = last_plot())
p1 <- plot_bw_profile(bwfiles = geo_bigwigs[c(2,3)],bg_bwfiles = c(geo_bigwigs[1],geo_bigwigs[1]), loci = top25.pds.CTCF_G4, mode = "center",verbose = F, colors = mypal[c(2,3)],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs[c(2,3)],bg_bwfiles = c(geo_bigwigs[1],geo_bigwigs[1]), loci = top25.pds.CTCF_only, mode = "center",verbose = F, colors = mypal[c(2,3)],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCFfoldchange_top25_PDSquantile_byG4.pdf"),
plot = last_plot())
p1 <- plot_bw_profile(bwfiles = geo_bigwigs[3],bg_bwfiles = geo_bigwigs[1], loci = top25.pdc.CTCF_G4, mode = "center",verbose = F, colors = mypal[3],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs[3],bg_bwfiles = geo_bigwigs[1], loci = top25.pdc.CTCF_only, mode = "center",verbose = F, colors = mypal[3],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCFfoldchange_top25_PhenDC3quantile_byG4.pdf"),
plot = last_plot())
#geo_CnR_bigwigs <- list.files("../mendeley/additional/","_G4.bw",full.names = T)
p1 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = top25.pds, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p2 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = bot75.pds, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p3 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = "../data/peaks/Control_peaks_shuffled.bed", mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
ggarrange(p1,p2,p3, ncol=3)
ggsave(glue("{plot_folder}Profile_G4atCTCFpeaks_percentiles.pdf"),
plot = last_plot())
closG4 <- bedscout::annotate_nearby_features(G4.motifs,feat_gr = makeGRangesFromDataFrame(results[,c(1,2,3,8)],keep.extra.columns = T),name_field = "name",distance_cutoff = 1000)
closG4 <- closG4[!is.na(closG4$nearby_features),]
p3 <- plot_bw_profile(bwfiles = c('../mendeley/additional/G4.mm10.plus.bw','../mendeley/additional/G4.mm10.minus.bw'), loci = closG4, mode = "center",verbose = F, colors = c("#FFAAFF","#DDBBFF"),show_error = T)
p4 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = closG4, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p5 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = closG4, mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
ggarrange(p3,p4,p5,ncol=3)
p1 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[3:4], loci = top25.pds, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p2 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[3:4], loci = bot75.pds, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p3 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[3:4], loci = "../data/peaks/Control_peaks_shuffled.bed", mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
ggarrange(p1,p2,p3, ncol=3)
ggsave(glue("{plot_folder}Profile_PhenDC3_G4atCTCFpeaks_PDSpercentiles.pdf"),
plot = last_plot())
p1 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = top25.pds.CTCF_G4, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p2 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = top25.pds.CTCF_only, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p3 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = "../data/peaks/Control_peaks_shuffled.bed", mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
ggarrange(p1,p2,p3, ncol=3)
ggsave(glue("{plot_folder}Profile_G4atCTCFpeaks_byG4.pdf"),
plot = last_plot())
p1 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = top25.pds, mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
p2 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = bot75.pds, mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
p3 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = "../data/peaks/Control_peaks_shuffled.bed", mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
ggarrange(p1,p2,p3, ncol=3)
p4 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = "../data/peaks/G4H_mm10_1.75.bed", mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p5 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = "../data/peaks/G4H_mm10_1.75.bed", mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
ggarrange(p4,p4,p5, ncol=3)
ggsave(glue("{plot_folder}Profile_G4atG4motifs.pdf"),
plot = last_plot())
p1 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = top25.pds.CTCF_G4, mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
p2 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = top25.pds.CTCF_only, mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
p3 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = "../data/peaks/Control_peaks_shuffled.bed", mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
ggarrange(p1,p2,p3, ncol=3)
p1 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[3:4], loci = top25.pdc.CTCF_G4, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p2 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[3:4], loci = top25.pdc.CTCF_only, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p3 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[3:4], loci = "../data/peaks/Control_peaks_shuffled.bed", mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
ggarrange(p1,p2,p3, ncol=3)
p1 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[4], bg_bwfiles = geo_CnR_bigwigs[3], norm_mode = "log2fc", loci = top25.pdc.CTCF_G4, mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
p2 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[4], bg_bwfiles = geo_CnR_bigwigs[3], norm_mode = "log2fc", loci = top25.pdc.CTCF_only, mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
p3 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[4], bg_bwfiles = geo_CnR_bigwigs[3], norm_mode = "log2fc", loci = "../data/peaks/Control_peaks_shuffled.bed", mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
ggarrange(p1,p2,p3, ncol=3)
df1 <- data.frame(set="top25_CTCF_G4", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[1:2],loci=top25.pds.CTCF_G4, labels=c("Mock","PDS")))[,6:7])
df2 <- data.frame(set="top25_CTCF_only", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[1:2],loci=top25.pds.CTCF_only, labels=c("Mock","PDS")))[,6:7])
df3 <- data.frame(set="bottom75", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[1:2],loci=bot75.pds, labels=c("Mock","PDS")))[,6:7])
df4 <- data.frame(set="Random", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[1:2], loci="../data/peaks/Control_peaks_shuffled.bed", labels=c("Mock","PDS")))[,6:7])
df <- rbind(df1,df2,df3,df4)
df$lfc.pds <- log2(df$PDS/df$Mock)
df$diff.pds <- df$PDS - df$Mock
mdf <- melt(df)
mdf$lg2 <- log2(mdf$value)
ggviolin(mdf[mdf$variable %in% c("Mock","PDS"),], "set","lg2",fill="variable",palette = mypal, add="median_iqr")
ggviolin(mdf[mdf$variable %in% "lfc.pds",], "set","value",fill="variable",palette = mypal, add="median_iqr") + geom_hline(yintercept = 0)
ggviolin(mdf[mdf$variable %in% "diff.pds",], "set","value",fill="variable",palette = mypal, add="median_iqr") + coord_cartesian(ylim = c(-20,20)) + geom_hline(yintercept = 0)
df1 <- data.frame(set="top25_CTCF_G4", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[3:4],loci=top25.pdc.CTCF_G4, labels=c("Mock","PDC")))[,6:7])
df2 <- data.frame(set="top25_CTCF_only", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[3:4],loci=top25.pdc.CTCF_only, labels=c("Mock","PDC")))[,6:7])
df3 <- data.frame(set="bottom75", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[3:4],loci=bot75.pdc, labels=c("Mock","PDC")))[,6:7])
df4 <- data.frame(set="Random", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[3:4], loci="../data/peaks/Control_peaks_shuffled.bed", labels=c("Mock","PDC")))[,6:7])
df <- rbind(df1,df2,df3,df4)
df$lfc.pdc <- log2(df$PDC/df$Mock)
df$diff.pdc <- df$PDC - df$Mock
mdf <- melt(df)
mdf$lg2 <- log2(mdf$value)
ggviolin(mdf[mdf$variable %in% c("Mock","PDC"),], "set","lg2",fill="variable",palette = mypal, add="median_iqr")
ggviolin(mdf[mdf$variable %in% "lfc.pdc",], "set","value",fill="variable",palette = mypal, add="median_iqr") + geom_hline(yintercept = 0)
ggviolin(mdf[mdf$variable %in% "diff.pdc",], "set","value",fill="variable",palette = mypal, add="median_iqr") + coord_cartesian(ylim = c(-20,20)) + geom_hline(yintercept = 0)
top25.pds.CTCF_G4 <- makeGRangesFromDataFrame(results[(results$pds.quantile==4 & results$class == "CTCF_and_G4"),1:3],na.rm=T)
top25.pds.CTCF_only <- makeGRangesFromDataFrame(results[(results$pds.quantile==4 & results$class == "CTCF_not_G4"),1:3],na.rm=T)
p1 <- plot_bw_profile(bwfiles = G4_bigwigs, loci = top25.pds.CTCF_G4, mode = "center",verbose = F, colors = mypal[3]) + coord_cartesian(ylim=c(0,10))
p2 <- plot_bw_profile(bwfiles = G4_bigwigs, loci = top25.pds.CTCF_only, mode = "center",verbose = F, colors = mypal[3]) + coord_cartesian(ylim=c(0,10))
ggarrange(p1,p2, ncol=2)
p1 <- plot_bw_profile(bwfiles = G4_bigwigs, loci = top25.pdc.CTCF_G4, mode = "center",verbose = F, colors = mypal[3]) + coord_cartesian(ylim=c(0,10))
p2 <- plot_bw_profile(bwfiles = G4_bigwigs, loci = top25.pdc.CTCF_only, mode = "center",verbose = F, colors = mypal[3]) + coord_cartesian(ylim=c(0,10))
ggarrange(p1,p2, ncol=2)
ord.top25 <- order(rowMeans(as.data.frame(bw_heatmap(bwfiles = geo_bigwigs[1],loci = top25.pds, mode = "center"))))
p1 <- plot_bw_heatmap(bwfiles = geo_bigwigs[1],loci = top25.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 50, order_by = ord.top25)
p2 <- plot_bw_heatmap(bwfiles = geo_bigwigs[2],loci = top25.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 50, order_by = ord.top25)
ggarrange(p1,p2, ncol=2, common.legend = T)
ggsave(glue("{plot_folder}Heatmap_CTCF_top25_PDSquantile.pdf"),
plot = last_plot())
ord.top25 <- order(rowMeans(as.data.frame(bw_heatmap(bwfiles = geo_bigwigs[3],loci = top25.pds, mode = "center"))))
p1 <- plot_bw_heatmap(bwfiles = geo_bigwigs[1],loci = top25.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 50, order_by = ord.top25)
p2 <- plot_bw_heatmap(bwfiles = geo_bigwigs[3],loci = top25.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 50, order_by = ord.top25)
ggarrange(p1,p2, ncol=2, common.legend = T)
ggsave(glue("{plot_folder}Heatmap_CTCF_top25_PhenDC3_on_PDS_quantile.pdf"),
plot = last_plot())
ord.bot75 <- order(rowMeans(as.data.frame(bw_heatmap(bwfiles = geo_bigwigs[1],loci = bot75.pds, mode = "center"))))
p1 <- plot_bw_heatmap(bwfiles = geo_bigwigs[1],loci = bot75.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 200, order_by = ord.bot75)
p2 <- plot_bw_heatmap(bwfiles = geo_bigwigs[2],loci = bot75.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 200, order_by = ord.bot75)
ggarrange(p1,p2, ncol=2, common.legend = T)
ggsave(glue("{plot_folder}Heatmap_CTCF_bot75_PDSquantile.pdf"),
plot = last_plot())
ord.bot75 <- order(rowMeans(as.data.frame(bw_heatmap(bwfiles = geo_bigwigs[1],loci = bot75.pds, mode = "center"))))
p1 <- plot_bw_heatmap(bwfiles = geo_bigwigs[1],loci = bot75.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 200, order_by = ord.bot75)
p2 <- plot_bw_heatmap(bwfiles = geo_bigwigs[3],loci = bot75.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 200, order_by = ord.bot75)
ggarrange(p1,p2, ncol=2, common.legend = T)
ggsave(glue("{plot_folder}Heatmap_CTCF_bot75_PhenDC3_on_PDS_quantile.pdf"),
plot = last_plot())
G4_CnR <- bw_loci(
c(
"../mendeley/FigureS2/G4_CUT-Run_Mock-PDS_CPM_Wulfridge.bw",
"../mendeley/FigureS2/G4_CUT-Run_PDS_CPM_Wulfridge.bw"
),
loci = makeGRangesFromDataFrame(results, na.rm = T),
labels = c("Mock", "PDS")
)
results$G4_CnR_delta <- G4_CnR$PDS - G4_CnR$Mock
results$G4_CnR_lfc <- log2(G4_CnR$PDS / G4_CnR$Mock)
results$G4_CnR_mock <- G4_CnR$Mock
ggviolin(
results,
x = "pds.quantile",
y = "G4_CnR_delta",
fill = mypal[2],
add = "median_iqr"
) + coord_cartesian(ylim = c(-10, 15)) + geom_hline(yintercept = 1.55, linetype =
"dotted")
ggsave(glue("{plot_folder}Violin_G4_CnR_delta_by_PDSquantile.pdf"),
plot = last_plot())
ggviolin(
results,
x = "pds.quantile",
y = "G4_CnR_lfc",
fill = mypal[2],
add = "median_iqr"
) + coord_cartesian(ylim = c(-10, 15)) + geom_hline(yintercept = 0, linetype =
"dotted")
ggsave(glue("{plot_folder}Violin_G4_CnR_delta_by_PDSquantile.pdf"),
plot = last_plot())
compare_means(G4_CnR_lfc ~ pds.quantile, results)
results$pds.quartile.top25 <- results$pds.quantile == 4
mean(na.omit(results$G4_CnR_delta))
mean(results$G4_CnR_delta[results$pds.quartile.top25])
mean(results$G4_CnR_delta[!results$pds.quartile.top25])
compare_means(G4_CnR_delta ~ pds.quartile.top25, results)
export.bed(top25.pds,"../data/peaks/CTCF_peaks_top25_pds_up.bed")
export.bed(bot75.pds,"../data/peaks/CTCF_peaks_bot75_pds_up.bed")
top25.pds <- rtracklayer::import("../data/peaks/CTCF_peaks_top25_pds_up.bed")
bot75.pds <- rtracklayer::import("../data/peaks/CTCF_peaks_bot75_pds_up.bed")
---
title: "R Notebook"
output: html_notebook
editor_options: 
  chunk_output_type: inline
---


```{r}
library("tidyverse")
library("data.table")
library("rtracklayer")
library("ggrastr")
library("glue")
library("DESeq2")
library("ggpubr")
library("wigglescout")
library("eulerr")
library("ggplot2")
library("corrplot")
library("bedscout")

# export 
result_folder = "../results/"
plot_folder = "../results/plots/"
stat_output = "../results/stats/"

combined_bigwigs <- list.files("../data/mendeley/Figure2", pattern = "*_CTCF*.bw",
                  full.names = TRUE)
geo_bigwigs <- list.files("../data/mendeley/Figure2",pattern ="*_CTCF.bw",
                  full.names = TRUE)
geo_CnR_bigwigs <- list.files("../data/mendeley/FigureS2", pattern = "*Wulfridge.bw",
                  full.names = TRUE)
```

```{r}
# colors
mypal <-c("cornflowerblue","orange","red2","darkgreen","#505050")
mypal <-c("#00619D","#A82A34","orange","seagreen","#505050")
mypal2 <-c("#00619D","#7FB0CE","#A82A34","#D39499","orange","#FFD55A","seagreen","#7DBA9C","#505050")
```

```{r}
# diff signal function
bw_granges_diff_analysis <- function(granges_c1,
                                     granges_c2,
                                     label_c1,
                                     label_c2,
                                     estimate_size_factors = FALSE,
                                     as_granges = FALSE) {
  # Bind first, get numbers after
  names_values <- NULL
  fields <- names(mcols(granges_c1))
  
  if ("name" %in% fields) {
    names_values <- mcols(granges_c1)[["name"]]
    granges_c1 <- granges_c1[, fields[fields != "name"]]
  }
  
  fields <- names(mcols(granges_c2))
  if ("name" %in% fields) {
    granges_c2 <- granges_c2[, fields[fields != "name"]]
  }
  
  cts_df <- cbind(data.frame(granges_c1), mcols(granges_c2))
  
  if (!is.null(names_values)) {
    rownames(cts_df) <- names_values
  }
  
  # Needs to drop non-complete cases and match rows
  complete <- complete.cases(cts_df)
  cts_df <- cts_df[complete, ]
  
  values_df <- cts_df[, 6:ncol(cts_df)] %>% dplyr::select(where(is.numeric))
  cts <- get_nreads_columns(values_df)
  
  condition_labels <- c(rep(label_c1, length(mcols(granges_c1))), rep(label_c2, length(mcols(granges_c2))))
  
  
  coldata <- data.frame(colnames(cts), condition = as.factor(condition_labels))
  print(coldata)
  
  dds <- DESeq2::DESeqDataSetFromMatrix(
    countData = cts,
    colData = coldata,
    design = ~ condition,
    rowRanges = granges_c1[complete, ]
  )
  
  
  if (estimate_size_factors == TRUE) {
    dds <- DESeq2::estimateSizeFactors(dds)
  }
  else {
    # Since files are scaled, we do not want to estimate size factors
    sizeFactors(dds) <- c(rep(1, ncol(cts)))
  }
  
  dds <- DESeq2::estimateDispersions(dds)
  dds <- DESeq2::nbinomWaldTest(dds)
  
  if (as_granges) {
    result <- DESeq2::results(dds, format = "GRanges", alpha = 0.01)
    if (!is.null(names_values)) {
      result$name <- names_values[complete]
    }
    
  }
  else {
    # result <- results(dds, format="DataFrame")
    result <- dds
  }
  
  result
}

get_nreads_columns <- function(df, length_factor = 100) {
  # Convert mean coverages to round integer read numbers
  cts <- as.matrix(df)
  cts <- as.matrix(cts[complete.cases(cts), ])
  cts <- round(cts * length_factor)
  cts
}
```

```{r fig.width=5, fig.height=5}
ctcf.raw <- import("../data/peaks/CTCF_mES_peaks.narrowPeak")
G4.raw <- import("../data/peaks/G4_WT_peaks.narrowPeak")
ctcf.raw$class <- "CTCF_not_G4"
G4.raw$class <- "G4"
ctcf.raw$pro <- "NA"
G4.raw$pro <- "NA"
tr.pro <- rtracklayer::import("../data/peaks/regions/promoters_geneSymbol.mm10.bed")
tr.blacklist <- rtracklayer::import("../data/peaks/regions/mm10-blacklist.v2.bed")

ctcf.anno <- bedscout::annotate_overlapping_features(ctcf.raw,tr.blacklist,name_field = "name")
ctcf.anno <- ctcf.anno[is.na(ctcf.anno$nearby_features)]

ctcf.anno <- bedscout::annotate_overlapping_features(ctcf.anno,tr.pro,name_field = "name")
ctcf.anno$pro[is.na(ctcf.anno$nearby_features)] <- "noPro"
ctcf.anno$pro[!is.na(ctcf.anno$nearby_features)] <- "Pro"

ctcf.anno <- bedscout::annotate_overlapping_features(ctcf.anno,G4.raw,name_field = "class")
ctcf.anno$class[!is.na(ctcf.anno$nearby_features)] <- "CTCF_and_G4"
ctcf.anno$name <- paste0(ctcf.anno$class," ",ctcf.anno$pro)



ctcf.anno <- sortSeqlevels(ctcf.anno)
ctcf.anno <- sort(ctcf.anno)
peaks_bed <- "../data/peaks/CTCF_G4_in_6_categories.bed"
export.bed(ctcf.anno, peaks_bed)
```

```{r}
ctcf.and.G4.pro <- import("../data/peaks/G4_CTCF_with_promoters_sorted.bed")
ctcf.not.G4.pro <- import("../data/peaks/CTCF-only_with_promoters_sorted.bed")
ctcf.and.G4.npr <- import("../data/peaks/G4_CTCF_without_promoters_sorted.bed")
ctcf.not.G4.npr <- import("../data/peaks/CTCF-only_without_promoters_sorted.bed")

ctcf.and.G4.pro$class <- "CTCF_and_G4"
ctcf.not.G4.pro$class <- "CTCF_not_G4"
ctcf.and.G4.npr$class <- "CTCF_and_G4"
ctcf.not.G4.npr$class <- "CTCF_not_G4"
ctcf.and.G4.pro$pro <- "Pro"
ctcf.not.G4.pro$pro <- "Pro"
ctcf.and.G4.npr$pro <- "noPro"
ctcf.not.G4.npr$pro <- "noPro"

ctcf.and.G4 <- c(ctcf.and.G4.pro,ctcf.and.G4.npr)
ctcf.not.G4 <- c(ctcf.not.G4.pro,ctcf.not.G4.npr)
ctcf.merged <- c(ctcf.and.G4,ctcf.not.G4)
ctcf.merged  <- sortSeqlevels(ctcf.merged )
ctcf.merged  <- sort(ctcf.merged)
names(ctcf.merged ) <- paste0(ctcf.merged $class," ",ctcf.merged $pro)
export.bed(ctcf.merged , "../data/peaks/CTCF_G4_in_6_categories_merged.bed")
```

```{r}
ctcf <- ctcf.anno
table(ctcf.merged$class,ctcf.merged$pro)
table(ctcf.anno$class,ctcf.anno$pro)
```

```{r}
ggdensity(as.data.frame(ctcf.anno,row.names = NULL),x="width",y = "density",color = "class", fill="class", alpha=0.1, palette=mypal) + scale_x_continuous(limits = c(100,800))
ggdensity(as.data.frame(ctcf.merged,row.names = NULL),x="width",y = "density",color = "class", fill="class", alpha=0.1, palette=mypal) + scale_x_continuous(limits = c(100,800))
```

```{r}
df <- rbind(data.frame(ctcf.anno[,0:0],peakwidth="original", row.names = NULL), data.frame(ctcf.merged[,0:0],peakwidth="merged", row.names = NULL))
ggdensity(df,x="width",y = "density",color = "peakwidth", fill="peakwidth", alpha=0.1, palette=mypal) + scale_x_continuous(limits = c(100,800)) + ggtitle("Peakwidth all peaks")
```

```{r}
df <- rbind(data.frame(ctcf.anno[ctcf.anno$class=="CTCF_and_G4",0:0],peakwidth="original", row.names = NULL), data.frame(ctcf.merged[ctcf.merged$class=="CTCF_and_G4",0:0],peakwidth="merged", row.names = NULL))
ggdensity(df,x="width",y = "density",color = "peakwidth", fill="peakwidth", alpha=0.1, palette=mypal) + scale_x_continuous(limits = c(100,800)) + ggtitle("Peakwidth CTCF_and_G4 peaks")
```

### calculate coverage over peaks
```{r}
# Wulfridge CTCF ChIP-Seq mocks
mocks_bigwigs = c("../mendeley/FigureS3/GSM7116277_E14_Mock_CTCF_Rep1.snakemake.bw",
          "../mendeley/FigureS3/GSM7116278_E14_Mock_CTCF_Rep2.snakemake.bw")

# PDS
# Wulfridge CTCF ChIP-Seq
pds_bigwigs = c("../mendeley/FigureS3/GSM7116279_E14_PDS_CTCF_Rep1.snakemake.bw",
                "../mendeley/FigureS3/GSM7116280_E14_PDS_CTCF_Rep2.snakemake.bw")

pdc_bigwigs = c("../mendeley/FigureS3/GSM7116281_E14_PhenDC3_CTCF_Rep1.snakemake.bw",
                "../mendeley/FigureS3/GSM7116282_E14_PhenDC3_CTCF_Rep2.snakemake.bw")

G4_bigwigs = c("../mendeley/Figure1/G4_CUT-Tag_thisStudy.bw")

cov.mocks <- bw_loci(mocks_bigwigs,peaks_bed,labels=c("mock_1","mock_2"))
cov.pds <- bw_loci(pds_bigwigs,peaks_bed,labels=c("PDS_1","PDS_2"))
cov.pdc <- bw_loci(pdc_bigwigs,peaks_bed,labels=c("PhenDC3_1","PhenDC3_2"))
cov.G4 <- bw_loci(G4_bigwigs,peaks_bed,labels=c("G4_NT"))
cov.combined <- bw_loci(combined_bigwigs,peaks_bed,labels=c("mock","PDS","PhenDC3"))
cov.geo <- bw_loci(geo_bigwigs,peaks_bed,labels=c("GEO_mock","GEO_PDS","GEO_PhenDC3"))
```

### quick comparison on deposited and reprocessed bigwig files
```{r fig.width=10, fig.height=10}
corr_df <- data.frame(data.frame(cov.combined)[,c(9,6,7,8)],data.frame(cov.geo)[,6:8],data.frame(cov.mocks)[,6:7],data.frame(cov.pds)[,6:7],data.frame(cov.pdc)[,6:7])
res <- cor(corr_df[,2:13])
corrplot(res, type = "upper", order = "hclust",tl.col = "black", tl.srt = 45,addCoef.col = 'black',tl.pos = 'd',    cl.pos = 'n', col.lim=c(0.93, 1),is.corr = F, method = 'color')
```

```{r fig.height=3, fig.width=9}
p1 <- plot_bw_profile(c(mocks_bigwigs[1],pds_bigwigs[1],pdc_bigwigs[1]),loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,2.4))
p2 <- plot_bw_profile(c(mocks_bigwigs[2],pds_bigwigs[2],pdc_bigwigs[2]),loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,2.4))
p3 <- plot_bw_profile(bwfiles = combined_bigwigs,loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,2.4))
ggarrange(p1,p2,p3, ncol=3)
```

```{r fig.height=3, fig.width=6}
p1 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,5))
p2 <- plot_bw_profile(bwfiles = combined_bigwigs,loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,2.6))
ggarrange(p1,p2, ncol=2)
```
```{r fig.height=3, fig.width=6}
p1 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = "../data/peaks/CTCF-only_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,5))
p2 <- plot_bw_profile(bwfiles = combined_bigwigs,loci = "../data/peaks/CTCF-only_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,2.6))
ggarrange(p1,p2, ncol=2)
```
```{r fig.height=3, fig.width=9}
p1 <- plot_bw_loci_scatter(mocks_bigwigs[1],mocks_bigwigs[2], loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed", verbose = F)
p2 <- plot_bw_loci_scatter(pds_bigwigs[1],pds_bigwigs[2], loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed", verbose = F)
p3 <- plot_bw_loci_scatter(pdc_bigwigs[1],pdc_bigwigs[2], loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed", verbose = F)
ggarrange(p1,p2,p3, ncol=3)
```

```{r fig.height=3, fig.width=9}
p1 <- plot_bw_loci_scatter(combined_bigwigs[1],combined_bigwigs[2], loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed", verbose = F)
p2 <- plot_bw_loci_scatter(combined_bigwigs[2],combined_bigwigs[3], loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed", verbose = F)
p3 <- plot_bw_loci_scatter(combined_bigwigs[1],combined_bigwigs[3], loci = "../data/peaks/G4_CTCF_with_promoters_sorted.bed", verbose = F)
ggarrange(p1,p2,p3, ncol=3)
```


```{r}
results <- data.frame(
  as.data.frame(cov.mocks),
  as.data.frame(cov.pds)[6:7],
  as.data.frame(cov.pdc)[6:7],
  as.data.frame(cov.G4)[6],
  raw.lfc.pds_1 = log2(cov.pds$PDS_1 / cov.mocks$mock_1),
  raw.lfc.pds_2 = log2(cov.pds$PDS_2 / cov.mocks$mock_2),
  raw.lfc.pdc_1 = log2(cov.pds$PDS_1 / cov.mocks$mock_1),
  raw.lfc.pdc_2 = log2(cov.pdc$PhenDC3_2 / cov.mocks$mock_2),
  raw.lfc.pds = log2(rowMeans(as.data.frame(cov.pds)[6:7]) / rowMeans(as.data.frame(cov.mocks)[6:7])),
  raw.lfc.pdc = log2(rowMeans(as.data.frame(cov.pdc)[6:7]) / rowMeans(as.data.frame(cov.mocks)[6:7])),
  mean.mock = rowMeans(as.data.frame(cov.mocks)[6:7]),
  mean.pds = rowMeans(as.data.frame(cov.pds)[6:7]),
  mean.pdc = rowMeans(as.data.frame(cov.pdc)[6:7])
)

cov.mocks$name <- NULL
cov.pds$name <- NULL
cov.pdc$name <- NULL

de_pds <- bw_granges_diff_analysis(cov.mocks, cov.pds, "Mock", "PDS")
lfc_pds = DESeq2::lfcShrink(de_pds, coef = "condition_PDS_vs_Mock", type = "apeglm")

de_pdc <- bw_granges_diff_analysis(cov.mocks, cov.pdc, "Mock", "PhenDC3")
lfc_pdc = DESeq2::lfcShrink(de_pdc, coef = "condition_PhenDC3_vs_Mock", type = "apeglm")

results$deseq.lfc.pds <- results(de_pds)$log2FoldChange
results$deseq.lfcs.pds <- lfc_pds$log2FoldChange
results$deseq.padj.pds <- lfc_pds$padj
results$deseq.mean.pds <- log2(lfc_pds$baseMean)
results$deseq.sig.pds <- lfc_pds$pvalue < 0.05

results$deseq.lfc.pdc <- results(de_pdc)$log2FoldChange
results$deseq.lfcs.pdc <- lfc_pdc$log2FoldChange
results$deseq.padj.pdc <- lfc_pdc$padj
results$deseq.mean.pdc <- log2(lfc_pdc$baseMean)
results$deseq.sig.pdc <- lfc_pdc$pvalue < 0.05

results$class <- gsub(" .+", "", results$name)
results$pro <- gsub(".+ ", "", results$name)
results$pro <- factor(results$pro, levels = c("Pro", "noPro"))
results$class <- factor(results$class, levels = c("CTCF_and_G4", "CTCF_not_G4"))

results$log2.mock <- log2(results$mean.mock)
results$log2.pds <- log2(results$mean.pds)
results$log2.pdc <- log2(results$mean.pdc)
results$log2.G4 <- log2(results$G4_NT)

results$deseq.sigup.pds <- results$deseq.sig.pds &
  results$deseq.lfc.pds > 0
results$deseq.sigup.pdc <- results$deseq.sig.pdc &
  results$deseq.lfc.pdc > 0
```

```{r}
write.table(results, glue("{result_folder}foldchange_results.txt"))
table(results$name)
```

```{r}
#results <- read.table("foldchange_results.txt")
results$class <- factor(results$class, levels = c("CTCF_and_G4", "CTCF_not_G4"))
```


```{r fig.width=3, fig.height=3}
p <- ggviolin(
  results,
  x = "class",
  y = "mean.mock",
  fill = "class",
  palette = mypal,
  add = "median_iqr"
) + coord_cartesian(ylim = c(0, 10))
annotate_figure(p, fig.lab = "CTCF signal by class in mock condition, mean of two reps, median+iqr", fig.lab.size = 6)
ggsave(glue("{plot_folder}Violin_CTCF_classes.pdf"), last_plot())
```

```{r fig.width=3, fig.height=3}
p <- ggviolin(
  results,
  x = "class",
  y = "mean.mock",
  fill = "pro",
  palette = mypal,
  add = "median_iqr"
) + coord_cartesian(ylim = c(0, 10))
annotate_figure(p, fig.lab = "CTCF signal by class in mock condition, mean of two reps, median+iqr", fig.lab.size = 6)
ggsave(glue("{plot_folder}Violin_CTCF_classes_pro.pdf"),
       last_plot())
```

```{r fig.width=5, fig.height=3}
mdf <- reshape2::melt(dplyr::select(
  results,
  c(
    "class",
    "mock_1",
    "mock_2",
    "PDS_1",
    "PDS_2",
    "PhenDC3_1",
    "PhenDC3_2"
  )
))
ggboxplot(
  mdf,
  x = "variable",
  y = "value",
  fill = "class",
  palette = mypal
) + coord_cartesian(ylim = c(0, 10))
ggsave(glue("{plot_folder}Boxplot_CTCF_reps.pdf"), last_plot())
```

```{r fig.width=5, fig.height=3}
mdf <- reshape2::melt(dplyr::select(
  results,
  c(
    "class",
    "mock_1",
    "mock_2",
    "PDS_1",
    "PDS_2",
    "PhenDC3_1",
    "PhenDC3_2"
  )
))

mdf_stats_class = compare_means(value ~ class, group.by = "variable", data = mdf)

ggviolin(
  mdf,
  x = "variable",
  y = "value",
  fill = "class",
  palette = mypal,
  add = "median_iqr"
) + coord_cartesian(ylim = c(0, 10))
ggsave(glue("{plot_folder}Violin_CTCF_reps.pdf"), last_plot())
```
```{r fig.width=3, fig.height=3}
mdf <- reshape2::melt(dplyr::select(results, c(
  "class", "deseq.lfc.pds", "deseq.lfc.pdc"
)))
p <- ggviolin(
  mdf,
  x = "variable",
  y = "value",
  fill = "class",
  palette = mypal,
  add = "median_iqr"
) + geom_hline(yintercept = 0, linetype = "dotted") + coord_cartesian(ylim =
                                                                        c(-3, 3)) + stat_compare_means(aes(group = class), label.y = 3, size = 2)

annotate_figure(p, fig.lab = "DESEq foldchange mean treat vs mean mock, median+iqr", fig.lab.size = 6)
ggsave(
  glue("{plot_folder}Violin_CTCF_lfc.pdf"),
  last_plot(),
  width = 5,
  height = 5
)
```
```{r fig.width=3, fig.height=3}
p <- ggboxplot(
  mdf,
  x = "variable",
  y = "value",
  fill = "class",
  palette = mypal
) + geom_hline(yintercept = 0, linetype = "dotted") + coord_cartesian(ylim =
                                                                        c(-2, 2))
annotate_figure(p, fig.lab = "DESEq foldchange mean treat vs mean mock, median+iqr", fig.lab.size = 6)
ggsave(glue("{plot_folder}Boxplot_CTCF_lfc.pdf"), last_plot())
```

```{r fig.width=5, fig.height=6}
mdf <- reshape2::melt(dplyr::select(results, c(
  "pro", "class", "deseq.lfc.pds", "deseq.lfc.pdc"
)))
mdf$pro <- factor(mdf$pro, levels = c("Pro", "noPro"))
mdf$x <- as.factor(paste0(mdf$class, " ", mdf$variable))
mdf$x <- factor(mdf$x, levels = levels(mdf$x)[c(2, 1, 4, 3)])
p <- ggviolin(
  mdf,
  x = "x",
  y = "value",
  fill = "class",
  palette = mypal,
  add = "median_iqr",
  facet.by = "pro"
) + geom_hline(yintercept = 0, linetype = "dotted") + coord_cartesian(ylim =
                                                                        c(-2, 2)) + theme(axis.text.x = element_text(angle = 90, hjust = 1))
annotate_figure(p, fig.lab = "DESEq foldchange mean treat vs mean mock, median+iqr", fig.lab.size = 6)
ggsave(glue("{plot_folder}Violin_CTCF_lfc_pro.pdf"), last_plot())
```

```{r}
med <- aggregate(deseq.lfc.pds ~ class,
                 data = results,
                 FUN = "median",
                 na.rm = T)
med$deseq.fc.pds <- 2 ^ med$deseq.lfc.pds
med
```

```{r}
med <- aggregate(deseq.lfc.pds ~ name,
                 data = results,
                 FUN = "median",
                 na.rm = T)
med$deseq.fc.pds <- 2 ^ med$deseq.lfc.pds
med
```
```{r}
med <- aggregate(deseq.lfc.pds ~ class,
                 data = results,
                 FUN = "median",
                 na.rm = T)
med$deseq.fc.pds <- 2 ^ med$deseq.lfc.pds
med
```
```{r}
med <- aggregate(deseq.lfc.pdc ~ name,
                 data = results,
                 FUN = "median",
                 na.rm = T)
med$deseq.fc.pdc <- 2 ^ med$deseq.lfc.pdc
med
```

```{r}
deseq_lfc_stats = compare_means(deseq.lfc.pds ~ class, data = results)
deseq_lfc_stats
write_tsv(deseq_lfc_stats,
          glue("{stat_output}pds-deseq2_lfc_statistics.tsv"))

```

```{r}
deseq_lfc_stats = compare_means(deseq.lfc.pdc ~ class, data = results)
deseq_lfc_stats
write_tsv(deseq_lfc_stats,
          glue("{stat_output}phendc-deseq2_lfc_statistics.tsv"))
```

```{r fig.width=5, fig.height=3}
mdf <- reshape2::melt(dplyr::select(
  results,
  c(
    "class",
    "raw.lfc.pds_1",
    "raw.lfc.pds_2",
    "raw.lfc.pdc_1",
    "raw.lfc.pdc_2"
  )
))
ggviolin(
  mdf,
  x = "variable",
  y = "value",
  fill = "class",
  palette = mypal,
  add = "median_iqr"
) + geom_hline(yintercept = 0, linetype = "dotted") + coord_cartesian(ylim =
                                                                        c(-3, 3)) +
  stat_compare_means(aes(group = class), label.y = 3.6, size = 2)
ggsave(glue("{plot_folder}Violin_CTCF_lfc_individual_reps.pdf"), last_plot())
```

```{r fig.width=3, fig.height=3}
ggdensity(
  results,
  x = "raw.lfc.pds",
  color = "class",
  fill = "class",
  palette = mypal
) + geom_vline(xintercept = 0, linetype = "dotted")
```

```{r fig.width=3, fig.height=3}
ggdensity(
  results,
  x = "raw.lfc.pdc",
  color = "class",
  fill = "class",
  palette = mypal
) + geom_vline(xintercept = 0, linetype = "dotted")
```


```{r fig.width=3, fig.height=3}
ggscatter(
  results,
  x = "log2.mock",
  y = "log2.pds",
  size = 1,shape=20,
  alpha = 0.1,
  color = "deseq.sig.pds",
  palette = mypal[c(5, 2)]
)
ggsave(glue("{plot_folder}Scatter_CTCF_DESEq_PDSvsMock.pdf"), rasterize(last_plot(),dpi = 600),width = 3, height= 3.2)
```
```{r fig.width=4, fig.height=4}
ggscatter(
  results,
  x = "log2.mock",
  y = "log2.pdc",
  size = 1,shape=20,
  alpha = 0.1,
  color = "deseq.sig.pdc",
  palette = mypal[c(5, 2)]
)
ggsave(glue("{plot_folder}Scatter_CTCF_DESEq_PhenDC3vsMock.pdf"), rasterize(last_plot(),dpi = 600),width = 3, height= 3.2)
```
```{r}
tb <- table(results$class)
deseq.stats <- as.data.frame(t(as.matrix(tb)))
deseq.stats[1,] <- colSums(deseq.stats)
rownames(deseq.stats) <- c("Total")
deseq.stats.percent <- deseq.stats
deseq.stats.percent[1,] <- c(100,100)
tb
```

```{r}
tb <- table(results$deseq.sig.pds &
        (results$deseq.lfc.pds > 0),
      results$class)
deseq.stats <- rbind(deseq.stats, PDS.sig.up = as.data.frame.matrix(tb)[2,])
tb
```
```{r}
tb <- prop.table(table(results$deseq.sig.pds &
        (results$deseq.lfc.pds > 0),
      results$class),margin = 2)*100
deseq.stats.percent <- rbind(deseq.stats.percent, PDS.sig.up  = as.data.frame.matrix(tb)[2,])
tb
```

```{r fig.width=3, fig.height=2.5}
table(results$deseq.sig.pds &
        (results$deseq.lfc.pds > 0),
      results$class)
mdf <- reshape2::melt(table(
  results$deseq.sig.pds & (results$deseq.lfc.pds > 0),
  results$class
))
ggplot(mdf, aes(Var1, Var2, fill = value)) +
  geom_tile(show.legend = F) + geom_text(aes(label = value)) +
  scale_fill_gradient(low = "white", high = "orange") + theme_minimal()
```

```{r}
vl <- list(
  sig = grep("TRUE", results$deseq.sigup.pds),
  CTCF_G4 = grep("and", results$class),
  CTCFonly = grep("not", results$class)
)
plot(euler(vl), quantities = T)
```

```{r}
tb <- table(results$deseq.sig.pdc &
        (results$deseq.lfc.pdc > 0),
      results$class)
deseq.stats <- rbind(deseq.stats, PhenDC3.sig.up = as.data.frame.matrix(tb)[2,])
tb
```
```{r}
table(results$deseq.sig.pdc &
        (results$deseq.lfc.pdc < 0),
      results$class)
```


```{r}
tb <- prop.table(table(results$deseq.sig.pdc &
        (results$deseq.lfc.pdc > 0),
      results$class),margin = 2)*100
deseq.stats.percent <- rbind(deseq.stats.percent, PhenDC3.sig.up = as.data.frame.matrix(tb)[2,])
tb
```


```{r fig.width=3, fig.height=2.5}
table(results$deseq.sig.pdc &
        (results$deseq.lfc.pdc > 0),
      results$class)
mdf <- reshape2::melt(table(
  results$deseq.sig.pdc & (results$deseq.lfc.pdc > 0),
  results$class
))
ggplot(mdf, aes(Var1, Var2, fill = value)) +
  geom_tile(show.legend = F) + geom_text(aes(label = value)) +
  scale_fill_gradient(low = "white", high = "orange") + theme_minimal()
```


```{r}
results$uid <- seq(1:nrow(results))
vl <- list(
  sig = grep("TRUE", results$deseq.sigup.pdc),
  CTCF_G4 = grep("and", results$class),
  CTCFonly = grep("not", results$class)
)
plot(euler(vl), quantities = T)
```
 
```{r fig.width=3, fig.height=2.5}
table(results$deseq.sigup.pds, results$deseq.sigup.pdc)
mdf <- reshape2::melt(table(results$deseq.sigup.pds, results$deseq.sigup.pdc))
ggplot(mdf, aes(Var1, Var2, fill = value)) +
  geom_tile(show.legend = F) + geom_text(aes(label = value)) +
  scale_fill_gradient(low = "white", high = "orange") + theme_minimal()
```
```{r}
tb <- table(results$deseq.sigup.pds & results$deseq.sigup.pdc,
      results$class)
deseq.stats <- rbind(deseq.stats, both.sig.up = as.data.frame.matrix(tb)[2,])
tb
```

```{r}
tb <- prop.table(table(results$deseq.sigup.pds & results$deseq.sigup.pdc,
      results$class),margin = 2)*100
deseq.stats.percent <- rbind(deseq.stats.percent, both = as.data.frame.matrix(tb)[2,])
tb
```

```{r fig.width=3, fig.height=2.5}
table(results$deseq.sigup.pds & results$deseq.sigup.pdc, results$class)
mdf <- reshape2::melt(table(results$deseq.sigup.pds & results$deseq.sigup.pdc, results$class))
ggplot(mdf, aes(Var1, Var2, fill = value)) +
  geom_tile(show.legend = F) + geom_text(aes(label = value)) +
  scale_fill_gradient(low = "white", high = "orange") + theme_minimal()
```

```{r fig.width=2.5, fig.height=2.5}
deseq.stats$class <- rownames(deseq.stats) 
mdf <- reshape2::melt(deseq.stats)
ggplot(mdf, aes(variable, class, fill = value)) +
  geom_tile(show.legend = F) + geom_text(aes(label = value)) +
  scale_fill_gradient(low = "white", high = "orange") + theme_minimal()
ggsave(glue("{plot_folder}Table_CTCF_DESEq_Sigup.pdf"), rasterize(last_plot(),dpi = 600),width = 2.5, height= 2.5)
```
```{r fig.width=2.5, fig.height=2.5}
deseq.stats.percent$class <- rownames(deseq.stats.percent) 
mdf <- reshape2::melt(deseq.stats.percent[-1,])
ggplot(mdf, aes(variable, class, fill = value)) +
  geom_tile(show.legend = F) + geom_text(aes(label = round(mdf$value,2))) +
  scale_fill_gradient(low = "white", high = "orange") + theme_minimal()
ggsave(glue("{plot_folder}Table_CTCF_DESEq_Sigup_percent.pdf"), rasterize(last_plot(),dpi = 600),width = 2.5, height= 2.5)
```

```{r}
vl <- list(
  sig_PDS = grep("TRUE", results$deseq.sigup.pds),
  sig_PDC = grep("TRUE", results$deseq.sigup.pdc),
  CTCF_G4 = grep("and", results$class),
  CTCFonly = grep("not", results$class)
)
plot(euler(vl), quantities = T)
```

```{r}
results$uid <- seq(1:nrow(results))
vl <- list(
  sig_PDS = grep("TRUE", results$deseq.sigup.pds),
  sig_PDC = grep("TRUE", results$deseq.sigup.pdc),
  CTCF_G4 = grep("and", results$class)
)
plot(euler(vl), quantities = T)
```
```{r fig.width=6, fig.height=6}
results$sig.by.class.pds <- paste0(results$deseq.sigup.pds, "_", results$class)
results$psize <- 0.01
results$psize[results$deseq.sigup.pds &
                results$class == "CTCF_and_G4"] <- 1

ggscatter(
  results,
  x = "log2.mock",
  y = "log2.pds",
  size = 0.5,
  alpha = results$psize,
  color = "sig.by.class.pds",
  palette = c(mypal[5], mypal[5], mypal[5], mypal[2], mypal[1])
)
```
```{r fig.width=6, fig.height=6}
ggscatter(
  results[!grepl("NA", results$sig.by.class.pds), ],
  x = "log2.mock",
  y = "deseq.lfc.pds",
  size = 0.2,
  alpha = "psize",
  color = "sig.by.class.pds",
  palette = c("#505050", "#505050", "red2", "blue")
)
```

```{r fig.width=5, fig.height=5}
ggscatterhist(
  results[!grepl("NA", results$sig.by.class.pds), ],
  x = "log2.mock",
  y = "log2.pds",
  size = 0.4,
  alpha = "psize",
  color = "sig.by.class.pds",
  margin.params = list(
    fill = "sig.by.class.pds",
    color = "black",
    size = 0.2
  ),
  palette = c("#505050", "#505050", "red2", "blue")
)
```

```{r fig.width=3, fig.height=3}
ggscatter(
  results,
  x = "raw.lfc.pds",
  y = "log2.G4",
  size = 0.2,
  alpha = 0.1,
  color = "deseq.sig.pds",
  cor.coef = T,
  palette = c("#888888",mypal[2])
)
ggsave(glue("{plot_folder}Scatter_G4_vs_LFC_PDS.pdf"),
       plot = rasterize(last_plot()))
```
```{r fig.width=3, fig.height=3}
ggscatter(
  results,
  x = "raw.lfc.pdc",
  y = "log2.G4",
  size = 0.2,
  alpha = 0.1,
  color = "deseq.sig.pdc",
  cor.coef = T,
  palette = c("#888888",mypal[2])
)
ggsave(glue("{plot_folder}Scatter_G4_vs_LFC_PDC.pdf"),
       plot = rasterize(last_plot()))
```

```{r fig.width=3, fig.height=3}
results$G4.quantile <- dplyr::ntile(results$G4_NT, n = 5)
results$pds.quantile <- dplyr::ntile(results$mean.pds-results$mean.mock, n = 4)
results$pdc.quantile <- dplyr::ntile(results$mean.pdc-results$mean.mock, n = 4)
```

```{r fig.width=6, fig.height=3}
top25.pds <- makeGRangesFromDataFrame(results[results$pds.quantile==4,1:3],na.rm=T)
bot75.pds <- makeGRangesFromDataFrame(results[results$pds.quantile!=4,1:3],na.rm=T)
top25.pds.CTCF_G4 <- makeGRangesFromDataFrame(results[(results$pds.quantile==4 & results$class == "CTCF_and_G4"),1:3],na.rm=T)
top25.pds.CTCF_only <- makeGRangesFromDataFrame(results[(results$pds.quantile==4  &  results$class == "CTCF_not_G4"),1:3],na.rm=T)
```

```{r}
top25.pdc <- makeGRangesFromDataFrame(results[results$pdc.quantile==4,1:3],na.rm=T)
bot75.pdc <- makeGRangesFromDataFrame(results[results$pdc.quantile!=4,1:3],na.rm=T)
top25.pdc.CTCF_G4 <- makeGRangesFromDataFrame(results[(results$pdc.quantile==4 & results$class == "CTCF_and_G4"),1:3],na.rm=T)
top25.pdc.CTCF_only <- makeGRangesFromDataFrame(results[(results$pdc.quantile==4  &  results$class == "CTCF_not_G4"),1:3],na.rm=T)

```


```{r}
vl <- list(
  CTCF_G4 = top25.pds.CTCF_G4,
  CTCF_only = top25.pds.CTCF_only,
  G4motif = rtracklayer::import("../data/peaks/G4H_mm10_1.75.bed")
)
bedscout::plot_euler(grlist = vl,names = c("CTCF_G4","CTCF_only","G4motif"))
```


```{r fig.width=3, fig.height=3}
ggviolin(
  results,
  x = "pds.quantile",
  y = "deseq.lfc.pds",
  fill = mypal[3],
  add = "median_iqr"
) + coord_cartesian(ylim = c(-1.5, 1.5)) + geom_hline(yintercept = 0, linetype =
                                                    "dotted")
ggsave(glue("{plot_folder}Violin_lfcPDS_by_PDSquantile.pdf"),
       plot = last_plot())
```

```{r fig.width=3, fig.height=3}
ggviolin(
  results,
  x = "pdc.quantile",
  y = "deseq.lfc.pdc",
  fill = mypal[3],
  add = "median_iqr"
) + coord_cartesian(ylim = c(-1.5, 1.5)) + geom_hline(yintercept = 0, linetype =
                                                    "dotted")
ggsave(glue("{plot_folder}Violin_lfcPDS_by_PhenDC3quantile.pdf"),
       plot = last_plot())
```


```{r fig.width=4, fig.height=3}
tb <- table(results$pds.quantile, results$class)
tb
ggbarplot(as.data.frame(t(as.matrix(tb))),"Var2","Freq",fill="Var1",palette = c(mypal[1],"#aaaaaa"),label = TRUE, lab.pos="in",ylim=c(11000,13000))
ggsave(glue("{plot_folder}Barplot_peakCat_by_PDSquantile.pdf"),
       plot = last_plot())
```
```{r fig.width=4, fig.height=3}
G4.motifs <- rtracklayer::import('../data/peaks/G4H_mm10_1.75.bed')
G4.motifs$type = rep("G4",length(G4.motifs))
ol <-  bedscout::annotate_overlapping_features( makeGRangesFromDataFrame(results[,1:3],na.rm=T),feat_gr = G4.motifs,name_field = "type")$nearby_features
ol[is.na(ol)] <- "none"

tb <- table(results$pds.quantile,Var1=ol)
              
tb

ggbarplot(as.data.frame(t(as.matrix(tb))),"Var2","Freq",fill="Var1",palette = c(mypal[1],"#aaaaaa"),label = TRUE, lab.pos="in",ylim=c(11000,13000))
ggsave(glue("{plot_folder}Barplot_G4motif_by_PDSquantile.pdf"),
       plot = last_plot())
```

```{r}
table(ol,results$pds.quantile,  results$class)
```
```{r fig.width=4, fig.height=3}
tb <- table(results$pdc.quantile, results$class)
tb
ggbarplot(as.data.frame(t(as.matrix(tb))),"Var2","Freq",fill="Var1",palette = c(mypal[1],"#aaaaaa"),label = TRUE, lab.pos="in",ylim=c(11000,13000))
ggsave(glue("{plot_folder}Barplot_peakCat_by_PhenDC3quantile.pdf"),
       plot = last_plot())
```


```{r fig.width=6, fig.height=3}
p1 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = top25.pds, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,6))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = bot75.pds, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,6))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCF_top25_PDSquantile.pdf"),
       plot = last_plot())
```
```{r fig.width=6, fig.height=3}
top25.pdc <- makeGRangesFromDataFrame(results[results$pdc.quantile==4,1:3],na.rm=T)
bot75.pdc <- makeGRangesFromDataFrame(results[results$pdc.quantile!=4,1:3],na.rm=T)
p1 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = top25.pdc, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,6))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = bot75.pdc, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,6))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCF_top25_PhenDC3quantile.pdf"),
       plot = last_plot())
```

```{r fig.width=6, fig.height=3}
p1 <- plot_bw_profile(bwfiles = geo_bigwigs[1:3],loci = top25.pds.CTCF_G4, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,10))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs[1:3],loci = top25.pds.CTCF_only, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,10))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCF_top25_PDSquantile_byG4overlap.pdf"),
       plot = last_plot())
```

```{r fig.width=6, fig.height=3}
top25.pdc.CTCF_G4 <- makeGRangesFromDataFrame(results[(results$pdc.quantile==4 & results$class == "CTCF_and_G4"),1:3],na.rm=T)
top25.pdc.CTCF_only <- makeGRangesFromDataFrame(results[(results$pdc.quantile==4  &  results$class == "CTCF_not_G4"),1:3],na.rm=T)

p1 <- plot_bw_profile(bwfiles = geo_bigwigs[c(1,3)],loci = top25.pdc.CTCF_G4, mode = "center",verbose = F, colors = mypal[c(1,3)]) + coord_cartesian(ylim=c(0,10))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs[c(1,3)],loci = top25.pdc.CTCF_only, mode = "center",verbose = F, colors = mypal[c(1,3)]) + coord_cartesian(ylim=c(0,10))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCF_top25_PhenDC3quantile_byG4overlap.pdf"),
       plot = last_plot())
```

```{r fig.width=6, fig.height=3}
p1 <- plot_bw_profile(bwfiles = geo_bigwigs[c(2,3)],bg_bwfiles =  c(geo_bigwigs[1],geo_bigwigs[1]), loci = top25.pds.CTCF_G4, mode = "center",verbose = F, colors = mypal[c(2,3)],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs[c(2,3)],bg_bwfiles = c(geo_bigwigs[1],geo_bigwigs[1]), loci = top25.pds.CTCF_only, mode = "center",verbose = F, colors = mypal[c(2,3)],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCFfoldchange_top25_PDSquantile_byG4.pdf"),
       plot = last_plot())
```
```{r fig.width=6, fig.height=3}
p1 <- plot_bw_profile(bwfiles = geo_bigwigs[3],bg_bwfiles = geo_bigwigs[1], loci = top25.pdc.CTCF_G4, mode = "center",verbose = F, colors = mypal[3],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs[3],bg_bwfiles = geo_bigwigs[1], loci = top25.pdc.CTCF_only, mode = "center",verbose = F, colors = mypal[3],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCFfoldchange_top25_PhenDC3quantile_byG4.pdf"),
       plot = last_plot())
```

```{r fig.width=8, fig.height=3}
#geo_CnR_bigwigs <- list.files("../mendeley/additional/","_G4.bw",full.names = T)
p1 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = top25.pds, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p2 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = bot75.pds, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p3 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = "../data/peaks/Control_peaks_shuffled.bed", mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
ggarrange(p1,p2,p3, ncol=3)
ggsave(glue("{plot_folder}Profile_G4atCTCFpeaks_percentiles.pdf"),
       plot = last_plot())
```
```{r fig.width=8, fig.height=3}
closG4 <- bedscout::annotate_nearby_features(G4.motifs,feat_gr = makeGRangesFromDataFrame(results[,c(1,2,3,8)],keep.extra.columns = T),name_field = "name",distance_cutoff = 1000)
closG4 <- closG4[!is.na(closG4$nearby_features),]
p3 <- plot_bw_profile(bwfiles = c('../mendeley/additional/G4.mm10.plus.bw','../mendeley/additional/G4.mm10.minus.bw'), loci = closG4, mode = "center",verbose = F, colors = c("#FFAAFF","#DDBBFF"),show_error = T)
p4 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = closG4, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p5 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = closG4, mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
ggarrange(p3,p4,p5,ncol=3)
```

```{r fig.width=8, fig.height=3}
p1 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[3:4], loci = top25.pds, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p2 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[3:4], loci = bot75.pds, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p3 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[3:4], loci = "../data/peaks/Control_peaks_shuffled.bed", mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
ggarrange(p1,p2,p3, ncol=3)
ggsave(glue("{plot_folder}Profile_PhenDC3_G4atCTCFpeaks_PDSpercentiles.pdf"),
       plot = last_plot())
```

```{r fig.width=8, fig.height=3}
p1 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = top25.pds.CTCF_G4, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p2 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = top25.pds.CTCF_only, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p3 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = "../data/peaks/Control_peaks_shuffled.bed", mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
ggarrange(p1,p2,p3, ncol=3)
ggsave(glue("{plot_folder}Profile_G4atCTCFpeaks_byG4.pdf"),
       plot = last_plot())
```

```{r fig.width=8, fig.height=3}
p1 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = top25.pds, mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
p2 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = bot75.pds, mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
p3 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = "../data/peaks/Control_peaks_shuffled.bed", mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
ggarrange(p1,p2,p3, ncol=3)
```
```{r fig.width=8, fig.height=3}
p4 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[1:2], loci = "../data/peaks/G4H_mm10_1.75.bed", mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p5 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = "../data/peaks/G4H_mm10_1.75.bed", mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
ggarrange(p4,p4,p5, ncol=3)
ggsave(glue("{plot_folder}Profile_G4atG4motifs.pdf"),
       plot = last_plot())
```

```{r fig.width=8, fig.height=3}
p1 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = top25.pds.CTCF_G4, mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
p2 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = top25.pds.CTCF_only, mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
p3 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[2], bg_bwfiles = geo_CnR_bigwigs[1], norm_mode = "log2fc", loci = "../data/peaks/Control_peaks_shuffled.bed", mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
ggarrange(p1,p2,p3, ncol=3)
```

```{r fig.width=8, fig.height=3}
p1 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[3:4], loci = top25.pdc.CTCF_G4, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p2 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[3:4], loci = top25.pdc.CTCF_only, mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
p3 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[3:4], loci = "../data/peaks/Control_peaks_shuffled.bed", mode = "center",verbose = F, colors = mypal,show_error = T) + coord_cartesian(ylim=c(0,15))
ggarrange(p1,p2,p3, ncol=3)
```

```{r fig.width=8, fig.height=3}
p1 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[4], bg_bwfiles = geo_CnR_bigwigs[3], norm_mode = "log2fc", loci = top25.pdc.CTCF_G4, mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
p2 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[4], bg_bwfiles = geo_CnR_bigwigs[3], norm_mode = "log2fc", loci = top25.pdc.CTCF_only, mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
p3 <- plot_bw_profile(bwfiles = geo_CnR_bigwigs[4], bg_bwfiles = geo_CnR_bigwigs[3], norm_mode = "log2fc", loci = "../data/peaks/Control_peaks_shuffled.bed", mode = "center",verbose = F, colors = mypal[2],show_error = T) + coord_cartesian(ylim=c(-2,2))
ggarrange(p1,p2,p3, ncol=3)
```

```{r fig.width=6, fig.height=3}
df1 <-  data.frame(set="top25_CTCF_G4", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[1:2],loci=top25.pds.CTCF_G4, labels=c("Mock","PDS")))[,6:7])
df2 <- data.frame(set="top25_CTCF_only", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[1:2],loci=top25.pds.CTCF_only, labels=c("Mock","PDS")))[,6:7])
df3 <- data.frame(set="bottom75", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[1:2],loci=bot75.pds, labels=c("Mock","PDS")))[,6:7])
df4 <- data.frame(set="Random", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[1:2], loci="../data/peaks/Control_peaks_shuffled.bed", labels=c("Mock","PDS")))[,6:7])
df <- rbind(df1,df2,df3,df4)
df$lfc.pds <- log2(df$PDS/df$Mock)
df$diff.pds <- df$PDS - df$Mock
mdf <- melt(df)
mdf$lg2 <- log2(mdf$value)
ggviolin(mdf[mdf$variable %in% c("Mock","PDS"),], "set","lg2",fill="variable",palette = mypal, add="median_iqr")
```
```{r fig.width=4, fig.height=3}
ggviolin(mdf[mdf$variable %in% "lfc.pds",], "set","value",fill="variable",palette = mypal, add="median_iqr") + geom_hline(yintercept = 0)
```

```{r fig.width=4, fig.height=3}
ggviolin(mdf[mdf$variable %in% "diff.pds",], "set","value",fill="variable",palette = mypal, add="median_iqr") + coord_cartesian(ylim = c(-20,20)) + geom_hline(yintercept = 0)
```

```{r fig.width=6, fig.height=3}
df1 <-  data.frame(set="top25_CTCF_G4", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[3:4],loci=top25.pdc.CTCF_G4, labels=c("Mock","PDC")))[,6:7])
df2 <- data.frame(set="top25_CTCF_only", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[3:4],loci=top25.pdc.CTCF_only, labels=c("Mock","PDC")))[,6:7])
df3 <- data.frame(set="bottom75", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[3:4],loci=bot75.pdc, labels=c("Mock","PDC")))[,6:7])
df4 <- data.frame(set="Random", as.data.frame(bw_loci(bwfiles = geo_CnR_bigwigs[3:4], loci="../data/peaks/Control_peaks_shuffled.bed", labels=c("Mock","PDC")))[,6:7])
df <- rbind(df1,df2,df3,df4)
df$lfc.pdc <- log2(df$PDC/df$Mock)
df$diff.pdc <- df$PDC - df$Mock
mdf <- melt(df)
mdf$lg2 <- log2(mdf$value)
ggviolin(mdf[mdf$variable %in% c("Mock","PDC"),], "set","lg2",fill="variable",palette = mypal, add="median_iqr")
```


```{r fig.width=4, fig.height=3}
ggviolin(mdf[mdf$variable %in% "lfc.pdc",], "set","value",fill="variable",palette = mypal, add="median_iqr") + geom_hline(yintercept = 0)
```

```{r fig.width=4, fig.height=3}
ggviolin(mdf[mdf$variable %in% "diff.pdc",], "set","value",fill="variable",palette = mypal, add="median_iqr") + coord_cartesian(ylim = c(-20,20)) + geom_hline(yintercept = 0)
```

```{r fig.width=6, fig.height=3}
top25.pds.CTCF_G4 <- makeGRangesFromDataFrame(results[(results$pds.quantile==4 & results$class == "CTCF_and_G4"),1:3],na.rm=T)
top25.pds.CTCF_only <- makeGRangesFromDataFrame(results[(results$pds.quantile==4  &  results$class == "CTCF_not_G4"),1:3],na.rm=T)

p1 <- plot_bw_profile(bwfiles = G4_bigwigs, loci = top25.pds.CTCF_G4, mode = "center",verbose = F, colors = mypal[3]) + coord_cartesian(ylim=c(0,10))
p2 <- plot_bw_profile(bwfiles = G4_bigwigs, loci = top25.pds.CTCF_only, mode = "center",verbose = F, colors = mypal[3]) + coord_cartesian(ylim=c(0,10))
ggarrange(p1,p2, ncol=2)
```

```{r fig.width=6, fig.height=3}
p1 <- plot_bw_profile(bwfiles = G4_bigwigs, loci = top25.pdc.CTCF_G4, mode = "center",verbose = F, colors = mypal[3]) + coord_cartesian(ylim=c(0,10))
p2 <- plot_bw_profile(bwfiles = G4_bigwigs, loci = top25.pdc.CTCF_only, mode = "center",verbose = F, colors = mypal[3]) + coord_cartesian(ylim=c(0,10))
ggarrange(p1,p2, ncol=2)
```

```{r PDS lower quantile, fig.height=3, fig.width=3}
ord.top25 <- order(rowMeans(as.data.frame(bw_heatmap(bwfiles = geo_bigwigs[1],loci = top25.pds, mode = "center"))))
p1 <- plot_bw_heatmap(bwfiles = geo_bigwigs[1],loci = top25.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 50, order_by = ord.top25)
p2 <- plot_bw_heatmap(bwfiles = geo_bigwigs[2],loci = top25.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 50, order_by = ord.top25)
ggarrange(p1,p2, ncol=2, common.legend = T)
ggsave(glue("{plot_folder}Heatmap_CTCF_top25_PDSquantile.pdf"),
       plot = last_plot())
```
```{r PhenDC top 25, fig.height=3, fig.width=3}

ord.top25 <- order(rowMeans(as.data.frame(bw_heatmap(bwfiles = geo_bigwigs[3],loci = top25.pds, mode = "center"))))
p1 <- plot_bw_heatmap(bwfiles = geo_bigwigs[1],loci = top25.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 50, order_by = ord.top25)
p2 <- plot_bw_heatmap(bwfiles = geo_bigwigs[3],loci = top25.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 50, order_by = ord.top25)
ggarrange(p1,p2, ncol=2, common.legend = T)
ggsave(glue("{plot_folder}Heatmap_CTCF_top25_PhenDC3_on_PDS_quantile.pdf"),
       plot = last_plot())

```

```{r PDS bottom 75, fig.height=4, fig.width=3}
ord.bot75 <- order(rowMeans(as.data.frame(bw_heatmap(bwfiles = geo_bigwigs[1],loci = bot75.pds, mode = "center"))))
p1 <- plot_bw_heatmap(bwfiles = geo_bigwigs[1],loci = bot75.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 200, order_by = ord.bot75)
p2 <- plot_bw_heatmap(bwfiles = geo_bigwigs[2],loci = bot75.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 200, order_by = ord.bot75)
ggarrange(p1,p2, ncol=2, common.legend = T)
ggsave(glue("{plot_folder}Heatmap_CTCF_bot75_PDSquantile.pdf"),
       plot = last_plot())
```
```{r PhenDC3 bottom 75, fig.height=4, fig.width=3}
ord.bot75 <- order(rowMeans(as.data.frame(bw_heatmap(bwfiles = geo_bigwigs[1],loci = bot75.pds, mode = "center"))))
p1 <- plot_bw_heatmap(bwfiles = geo_bigwigs[1],loci = bot75.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 200, order_by = ord.bot75)
p2 <- plot_bw_heatmap(bwfiles = geo_bigwigs[3],loci = bot75.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 200, order_by = ord.bot75)
ggarrange(p1,p2, ncol=2, common.legend = T)
ggsave(glue("{plot_folder}Heatmap_CTCF_bot75_PhenDC3_on_PDS_quantile.pdf"),
       plot = last_plot())

```

```{r fig.width=3, fig.height=3}
G4_CnR <- bw_loci(
  c(
    "../mendeley/FigureS2/G4_CUT-Run_Mock-PDS_CPM_Wulfridge.bw",
    "../mendeley/FigureS2/G4_CUT-Run_PDS_CPM_Wulfridge.bw"
  ),
  loci = makeGRangesFromDataFrame(results, na.rm = T),
  labels = c("Mock", "PDS")
)
results$G4_CnR_delta <- G4_CnR$PDS - G4_CnR$Mock
results$G4_CnR_lfc <- log2(G4_CnR$PDS / G4_CnR$Mock)
results$G4_CnR_mock <- G4_CnR$Mock
```

```{r fig.width=3, fig.height=2}
ggviolin(
  results,
  x = "pds.quantile",
  y = "G4_CnR_delta",
  fill = mypal[2],
  add = "median_iqr"
) + coord_cartesian(ylim = c(-10, 15)) + geom_hline(yintercept = 1.55, linetype =
                                                    "dotted")
ggsave(glue("{plot_folder}Violin_G4_CnR_delta_by_PDSquantile.pdf"),
       plot = last_plot())
```
```{r fig.width=3, fig.height=2}
ggviolin(
  results,
  x = "pds.quantile",
  y = "G4_CnR_lfc",
  fill = mypal[2],
  add = "median_iqr"
) + coord_cartesian(ylim = c(-10, 15)) + geom_hline(yintercept = 0, linetype =
                                                    "dotted")
ggsave(glue("{plot_folder}Violin_G4_CnR_delta_by_PDSquantile.pdf"),
       plot = last_plot())
```

```{r}
compare_means(G4_CnR_lfc ~ pds.quantile, results)
```
```{r}
results$pds.quartile.top25 <- results$pds.quantile == 4
mean(na.omit(results$G4_CnR_delta))
mean(results$G4_CnR_delta[results$pds.quartile.top25])
mean(results$G4_CnR_delta[!results$pds.quartile.top25])
compare_means(G4_CnR_delta ~ pds.quartile.top25, results)
```


```{r}
export.bed(top25.pds,"../data/peaks/CTCF_peaks_top25_pds_up.bed")
export.bed(bot75.pds,"../data/peaks/CTCF_peaks_bot75_pds_up.bed")
```

```{r}
top25.pds <- rtracklayer::import("../data/peaks/CTCF_peaks_top25_pds_up.bed")
bot75.pds <- rtracklayer::import("../data/peaks/CTCF_peaks_bot75_pds_up.bed")
```

### Analysis by G4 motif

```{r fig.width=6, fig.height=3}
rx.G4 <- rtracklayer::import("../data/peaks/mm10_canonical_G4_PQS-regex.bed")  
tr.G4 <- rtracklayer::import("../data/peaks/G4H_mm10_1.75.bed")  
G4_bed <- rtracklayer::import('../data/peaks/G4_WT_peaks.narrowPeak')
bedscout::plot_euler(list(rx.G4,tr.G4),names=c("Regex","G4Hunter"))
ggsave(glue("{plot_folder}Venn_Regex_G4Hunter.pdf"),last_plot())
```
```{r fig.width=6, fig.height=3}
bedscout::plot_euler(list(rx.G4,tr.G4,G4_bed),names=c("Regex","G4Hunter","G4"))
ggsave(glue("{plot_folder}Venn_Regex_G4Hunter_G4CnT.pdf"),
       plot = last_plot())
```

```{r}
tr.G4$name <- "G4"
top25.pds.G4anno <- bedscout::annotate_nearby_features(top25.pds,tr.G4,name_field = "name", distance_cutoff = 1000)
top25.pds.G4_1kb <- top25.pds.G4anno[!is.na(top25.pds.G4anno$nearby_features),]              
top25.pds.noG4 <- top25.pds.G4anno[is.na(top25.pds.G4anno$nearby_features),]               
                                
p1 <- plot_bw_profile(bwfiles = geo_bigwigs, loci = top25.pds.G4_1kb, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,10))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs, loci = top25.pds.noG4, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,10))
ggarrange(p1,p2, ncol=2)
```
```{r fig.width=6, fig.height=3}
p1 <- plot_bw_profile(bwfiles = geo_bigwigs[1:2],loci = top25.pds.G4_1kb, mode = "center",verbose = F, colors = mypal[c(1,2)]) + coord_cartesian(ylim=c(0,10))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs[1:2],loci = top25.pds.noG4, mode = "center",verbose = F, colors = mypal[c(1,2)]) + coord_cartesian(ylim=c(0,10))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCF_top25_PDSquantile_byG4overlap.pdf"),
       plot = last_plot())
```

```{r fig.width=6, fig.height=3} 
p1 <- plot_bw_profile(bwfiles = geo_bigwigs[2],bg_bwfiles = geo_bigwigs[1], loci = top25.pds.G4_1kb, mode = "center",verbose = F, colors = mypal[2],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs[2],bg_bwfiles = geo_bigwigs[1], loci = top25.pds.noG4, mode = "center",verbose = F, colors = mypal[2],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCFfoldchange_top25_PDSquantile_byG4motif1kb.pdf"),
       plot = last_plot())
```

```{r fig.width=6, fig.height=3}
top25.pdc.G4anno <- bedscout::annotate_nearby_features(top25.pdc,tr.G4,name_field = "name", distance_cutoff = 1000)
top25.pdc.G4_1kb <- top25.pds.G4anno[!is.na(top25.pds.G4anno$nearby_features),]              
top25.pdc.noG4 <- top25.pds.G4anno[is.na(top25.pds.G4anno$nearby_features),]               
                                
p1 <- plot_bw_profile(bwfiles = geo_bigwigs, loci = top25.pdc.G4_1kb, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,10))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs, loci = top25.pdc.noG4, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,10))
ggarrange(p1,p2, ncol=2)
```

```{r fig.width=6, fig.height=3}
p1 <- plot_bw_profile(bwfiles = geo_bigwigs[3],bg_bwfiles = geo_bigwigs[1], loci = top25.pdc.G4_1kb, mode = "center",verbose = F, colors = mypal[3],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs[3],bg_bwfiles = geo_bigwigs[1], loci = top25.pdc.noG4, mode = "center",verbose = F, colors = mypal[3],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCFfoldchange_top25_PhenDC3quantile_byG4motif1kb.pdf"),
       plot = last_plot())
```


```{r fig.width=6, fig.height=3}
tr.G4 <- rtracklayer::import("../data/peaks/G4H_mm10_1.75.bed")  
tr.G4$name <- "G4"
top25.pdc.G4anno <- bedscout::annotate_nearby_features(top25.pdc,tr.G4,name_field = "name", distance_cutoff = 1000)
top25.pdc.G4_1kb <- top25.pdc.G4anno[!is.na(top25.pdc.G4anno$nearby_features),]              
top25.pdc.noG4 <- top25.pdc.G4anno[is.na(top25.pdc.G4anno$nearby_features),]               
                                
p1 <- plot_bw_profile(bwfiles = geo_bigwigs, loci = top25.pdc.G4_1kb, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,10))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs, loci = top25.pdc.noG4, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,10))
ggarrange(p1,p2, ncol=2)
```
```{r fig.width=6, fig.height=3}
p1 <- plot_bw_profile(bwfiles = geo_bigwigs[c(1,3)],loci = top25.pdc.G4_1kb, mode = "center",verbose = F, colors = mypal[c(1,3)]) + coord_cartesian(ylim=c(0,10))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs[c(1,3)],loci = top25.pdc.noG4, mode = "center",verbose = F, colors = mypal[c(1,3)]) + coord_cartesian(ylim=c(0,10))
ggarrange(p1,p2, ncol=2)
```


```{r fig.width=6, fig.height=3}
p1 <- plot_bw_profile(bwfiles = geo_bigwigs[3],bg_bwfiles = geo_bigwigs[1], loci = top25.pds.G4_1kb, mode = "center",verbose = F, colors = mypal[3],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs[3],bg_bwfiles = geo_bigwigs[1], loci = top25.pds.noG4, mode = "center",verbose = F, colors = mypal[3],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
ggarrange(p1,p2, ncol=2)
```

### Analysis by G4 quantile
```{r fig.width=3, fig.height=3}
ggviolin(
  results,
  x = "G4.quantile",
  y = "deseq.lfc.pds",
  fill = mypal[1],
  add = "median_iqr"
) + coord_cartesian(ylim = c(-2, 2)) + geom_hline(yintercept = 0, linetype =
                                                    "dotted")
ggsave(glue("{plot_folder}Violin_CTCF_lfc_by_G4quantile.pdf"),
       plot = last_plot())
```



```{r fig.width=3, fig.height=3}
ggviolin(
  results,
  x = "G4.quantile",
  y = "log2.G4",
  fill = mypal2[5],
  add = "median_iqr"
) + geom_hline(yintercept = 0, linetype = "dotted")
ggsave(glue("{plot_folder}Violin_G4_by_G4quantile.pdf"), plot = last_plot())
```

```{r fig.width=3, fig.height=3}
peak_cats <- bedscout::import_named_bed_into_list(peaks_bed)
plot_bw_profile(
  G4_bigwigs,
  peak_cats,
  labels = c(
    peak_cats[[1]][1, ]$name,
    peak_cats[[2]][1, ]$name,
    peak_cats[[3]][1, ]$name,
    peak_cats[[4]][1, ]$name
  ),
  mode = "center",
  show_error = T,
  verbose = F,
  remove_top = 0.001,
  colors = mypal
)
```

```{r fig.width=3, fig.height=3}
plot_bw_profile(
  combined_bigwigs[1],
  peak_cats,
  labels = c(
    peak_cats[[1]][1, ]$name,
    peak_cats[[2]][1, ]$name,
    peak_cats[[3]][1, ]$name,
    peak_cats[[4]][1, ]$name
  ),
  mode = "center",
  show_error = T,
  verbose = F,
  remove_top = 0.001,
  colors = mypal
)
```




```{r fig.width=12, fig.height=3}
p1 <- plot_bw_profile(
  c(mocks_bigwigs, pds_bigwigs),
  peak_cats[[1]],
  labels = c("mock1", "mock2", "trt1", "rtr2"),
  mode = "center",
  show_error = T,
  verbose = F,
  remove_top = 0.001,
  colors = mypal2[c(9, 10, 5, 6)],
  upstream = 1500,
  downstream = 1500
)
p2 <- plot_bw_profile(
  c(mocks_bigwigs, pds_bigwigs),
  peak_cats[[2]],
  labels = c("mock1", "mock2", "trt1", "rtr2"),
  mode = "center",
  show_error = T,
  verbose = F,
  remove_top = 0.001,
  colors = mypal2[c(9, 10, 5, 6)],
  upstream = 1500,
  downstream = 1500
)
p3 <- plot_bw_profile(
  c(mocks_bigwigs, pds_bigwigs),
  peak_cats[[3]],
  labels = c("mock1", "mock2", "trt1", "rtr2"),
  mode = "center",
  show_error = T,
  verbose = F,
  remove_top = 0.001,
  colors = mypal2[c(9, 10, 5, 6)],
  upstream = 1500,
  downstream = 1500
)
p4 <- plot_bw_profile(
  c(mocks_bigwigs, pds_bigwigs),
  peak_cats[[4]],
  labels = c("mock1", "mock2", "trt1", "rtr2"),
  mode = "center",
  show_error = T,
  verbose = F,
  remove_top = 0.001,
  colors = mypal2[c(9, 10, 5, 6)],
  upstream = 1500,
  downstream = 1500
)

ggarrange(p1, p2, p3, p4, ncol = 4, nrow = 1)
```

```{r fig.width=12, fig.height=3}
p1 <- plot_bw_profile(
  c(mocks_bigwigs, pds_bigwigs),
  peak_cats[[1]],
  labels = c("mock1", "mock2", "trt1", "rtr2"),
  mode = "center",
  show_error = T,
  verbose = F,
  remove_top = 0.001,
  colors = mypal2[c(9, 10, 3, 4)],
  upstream = 1500,
  downstream = 1500
)
p2 <- plot_bw_profile(
  c(mocks_bigwigs, pds_bigwigs),
  peak_cats[[2]],
  labels = c("mock1", "mock2", "trt1", "rtr2"),
  mode = "center",
  show_error = T,
  verbose = F,
  remove_top = 0.001,
  colors = mypal2[c(9, 10, 3, 4)],
  upstream = 1500,
  downstream = 1500
)
p3 <- plot_bw_profile(
  c(mocks_bigwigs, pds_bigwigs),
  peak_cats[[3]],
  labels = c("mock1", "mock2", "trt1", "rtr2"),
  mode = "center",
  show_error = T,
  verbose = F,
  remove_top = 0.001,
  colors = mypal2[c(9, 10, 3, 4)],
  upstream = 1500,
  downstream = 1500
)
p4 <- plot_bw_profile(
  c(mocks_bigwigs, pds_bigwigs),
  peak_cats[[4]],
  labels = c("mock1", "mock2", "trt1", "rtr2"),
  mode = "center",
  show_error = T,
  verbose = F,
  remove_top = 0.001,
  colors = mypal2[c(9, 10, 3, 4)],
  upstream = 1500,
  downstream = 1500
)

ggarrange(p1, p2, p3, p4, ncol = 4, nrow = 1)
```
### cen we learn something from the genes with top-most increase in CTCF?
```{r}
sigup.pds <- results[results$deseq.sigup.pds, ]
sigup.pdc <- results[results$deseq.sigup.pdc, ]

#export.bed(sigup.pds,"peaks_sigup_pds.bed")
#export.bed(sigup.pds,"peaks_sigup_pds.bed")
```


```{r}
gghistogram(results, "width", add = "median", fill = "class")
```

### Distance-based analysis

```{r}
G4_bed <- import('../data/peaks/G4_WT_peaks.narrowPeak')
ATAC_bed <- import('../data/peaks/ATAC_seq_mESC_Martire_peaks.narrowPeak')
pro_bed <- import('../data/peaks/regions/promoters_geneSymbol.mm10.bed')


G4_bed <- bedscout::annotate_overlapping_features(G4_bed, pro_bed, name_field = "name")

G4_bed$name <- "G4"
G4_bed$name[!is.na(G4_bed$nearby_features)] <- "G4pro"
G4_bed$name[grepl("pro", G4_bed$name)] <- "G4pro"
```

```{r}
nearest_G4_1kb <- bedscout::annotate_nearby_features(
  ctcf,
  G4_bed,
  distance_cutoff = 1000,
  ignore.strand = T,
  name_field = "name"
)
nearest_G4_2.5kb <- bedscout::annotate_nearby_features(
  ctcf,
  G4_bed,
  distance_cutoff = 2500,
  ignore.strand = T,
  name_field = "name"
)
nearest_G4_5kb <- bedscout::annotate_nearby_features(
  ctcf,
  G4_bed,
  distance_cutoff = 5000,
  ignore.strand = T,
  name_field = "name"
)
nearest_G4_10kb <- bedscout::annotate_nearby_features(
  ctcf,
  G4_bed,
  distance_cutoff = 10000,
  ignore.strand = T,
  name_field = "name"
)
nearest_G4_50kb <- bedscout::annotate_nearby_features(
  ctcf,
  G4_bed,
  distance_cutoff = 50000,
  ignore.strand = T,
  name_field = "name"
)

ctcf$nearest_G4 <- factor(">50kb",
                          levels = c("<1kb", "<2.5kb", "<5kb", "<10kb", "<50kb", ">50kb"))
ctcf$nearest_G4[!is.na(nearest_G4_50kb$nearby_features)] <- "<50kb"
ctcf$nearest_G4[!is.na(nearest_G4_10kb$nearby_features)] <- "<10kb"
ctcf$nearest_G4[!is.na(nearest_G4_5kb$nearby_features)] <- "<5kb"
ctcf$nearest_G4[!is.na(nearest_G4_2.5kb$nearby_features)] <- "<2.5kb"
ctcf$nearest_G4[!is.na(nearest_G4_1kb$nearby_features)] <- "<1kb"

ctcf$nearest_G4_type <- "none"
ctcf$nearest_G4_type[!is.na(nearest_G4_50kb$nearby_features)] <- nearest_G4_50kb$nearby_features[!is.na(nearest_G4_50kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_10kb$nearby_features)] <- nearest_G4_10kb$nearby_features[!is.na(nearest_G4_10kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_5kb$nearby_features)] <- nearest_G4_5kb$nearby_features[!is.na(nearest_G4_5kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_2.5kb$nearby_features)] <- nearest_G4_2.5kb$nearby_features[!is.na(nearest_G4_2.5kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_1kb$nearby_features)] <- nearest_G4_1kb$nearby_features[!is.na(nearest_G4_1kb$nearby_features)]

ctcf$nearest_G4_type[grep("pro", ctcf$nearest_G4_type)] <- "G4pro"

results$nearest_G4 <- ctcf$nearest_G4
results$nearest_G4_type <- ctcf$nearest_G4_type
table(results$nearest_G4)
table(results$nearest_G4_type)
table(results$nearest_G4, results$nearest_G4_type)
```
```{r fig.width=3, fig.height=3}
ggviolin(
  results,
  x = "nearest_G4",
  y = "mean.mock",
  fill = mypal[1],
  add = "median_iqr"
) + coord_cartesian(ylim = c(0, 10)) +
  stat_compare_means(label.y = 8,
                     label.x = 3,
                     size = 3) +
  geom_hline(yintercept = 0, linetype = "dotted")
ggsave(glue("{plot_folder}Violin_CTCF_signal_by_G4distance.pdf"),
       plot = last_plot())
```

```{r fig.width=3, fig.height=3}
ggviolin(
  results,
  x = "nearest_G4",
  y = "deseq.lfc.pds",
  fill = mypal[1],
  add = "median_iqr"
) + coord_cartesian(ylim = c(-4, 4)) + geom_hline(yintercept = 0, linewidth = 0.2) +
  geom_hline(
    yintercept = median(results$deseq.lfc.pds[results$nearest_G4 == "<1kb"]),
    linetype = "dotted",
    linewidth = 0.2
  ) +
  stat_compare_means(label.y = 3,
                     label.x = 2,
                     size = 3)
ggsave(glue("{plot_folder}Violin_CTCF_PDS_lfc_by_G4distance.pdf"),
       plot = last_plot())
```
```{r}
nearest_G4_stats = compare_means(deseq.lfc.pds ~ nearest_G4, results)
nearest_G4_stats
write_tsv(nearest_G4_stats,
          glue("{stat_output}pds_G4distance_plots-statistics.tsv"))
```

```{r fig.width=3, fig.height=3}
ggviolin(
  results,
  x = "nearest_G4",
  y = "deseq.lfc.pdc",
  fill = mypal[1],
  add = "median_iqr"
) + coord_cartesian(ylim = c(-2, 2)) + geom_hline(yintercept = 0, linewidth = 0.2) + geom_hline(
  yintercept = mean(results$deseq.lfc.pdc[results$nearest_G4 == "<1kb"]),
  linetype = "dotted",
  linewidth = 0.2
)
ggsave(glue("{plot_folder}Violin_CTCF_PhenDC3_lfc_by_G4distance.pdf"), plot = last_plot())
```

```{r}
nearest_G4_stats = compare_means(deseq.lfc.pdc ~ nearest_G4, results)
nearest_G4_stats
write_tsv(nearest_G4_stats, glue("{stat_output}phendc_G4distance_plots-statistics.tsv"))
```
```{r fig.width=5, fig.height=3}
ggviolin(
  results,
  x = "nearest_G4",
  y = "deseq.lfc.pds",
  fill = "nearest_G4_type",
  palette = mypal[c(1, 3, 5)],
  add = "median_iqr"
) + coord_cartesian(ylim = c(-2, 4)) + geom_hline(yintercept = 0, linewidth = 0.2) + geom_hline(
  yintercept = mean(results$deseq.lfc.pds[results$nearest_G4 == "<1kb"]),
  linetype = "dotted",
  linewidth = 0.2
) +
  stat_compare_means(aes(group = nearest_G4_type))
ggsave(glue("{plot_folder}Violin_CTCF_PDS_lfc_by_G4distance_pro.pdf"),
       plot = last_plot())
```
```{r}
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$nearest_G4_type ==
                                                    "G4", ])
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$nearest_G4_type ==
                                                    "G4", ]) %>%
  write_tsv(.,
            glue(
              "{stat_output}pds_G4distance_plots-nearest_G4-statistics.tsv"
            ))
```
```{r}
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$nearest_G4_type ==
                                                    "G4pro", ])
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$nearest_G4_type ==
                                                    "G4pro", ]) %>%
  write_tsv(.,
            glue(
              "{stat_output}pds_G4distance_plots-nearest_G4pro-statistics.tsv"
            ))
```
```{r fig.width=5, fig.height=3}
ggviolin(
  results,
  x = "nearest_G4",
  y = "deseq.lfc.pdc",
  fill = "nearest_G4_type",
  palette = mypal[c(1, 3, 5)],
  add = "median_iqr"
) + coord_cartesian(ylim = c(-2, 2)) + geom_hline(yintercept = 0, linewidth = 0.2) + geom_hline(
  yintercept = mean(results$deseq.lfc.pdc[results$nearest_G4 == "<1kb"]),
  linetype = "dotted",
  linewidth = 0.2
)
ggsave(glue(
  "{plot_folder}Violin_CTCF_PhenDC3_lfc_by_G4distance_pro.pdf"
),
plot = last_plot())
```

```{r}
compare_means(deseq.lfc.pdc ~ nearest_G4, results[results$nearest_G4_type ==
                                                    "G4", ])
compare_means(deseq.lfc.pdc ~ nearest_G4, results[results$nearest_G4_type ==
                                                    "G4", ]) %>%
  write_tsv(.,
            glue(
              "{stat_output}phendc_G4distance_plots-nearest_G4-statistics.tsv"
            ))
```

```{r}
compare_means(deseq.lfc.pdc ~ nearest_G4, results[results$nearest_G4_type ==
                                                    "G4pro", ])
compare_means(deseq.lfc.pdc ~ nearest_G4, results[results$nearest_G4_type ==
                                                    "G4pro", ]) %>%
  write_tsv(.,
            glue(
              "{stat_output}phendc_G4distance_plots-nearest_G4pro-statistics.tsv"
            ))
```
### Distance-based analysis, this time with predicted G4 motifs, not experimental peaks

```{r}
#results <- read.table(glue("{result_folder}foldchange_results.txt"))
results$class <- factor(results$class, levels = c("CTCF_and_G4", "CTCF_not_G4"))
G4_bed <- import('../data/peaks/G4_WT_peaks.narrowPeak')
G4_bed$name <- "peak"
G4pred_bed <- import('../data/peaks/G4H_mm10_1.75.bed')
ATAC_bed <- import('../data/peaks/ATAC_seq_mESC_Martire_peaks.narrowPeak')
pro_bed <- import('../data/peaks/regions/promoters_geneSymbol.mm10.bed')

G4pred_bed <- bedscout::annotate_overlapping_features(G4pred_bed, G4_bed, name_field = "name") 

G4pred_bed$name <- "G4pred"
G4pred_bed$name[grepl("peak", G4pred_bed$nearby_features)] <- "G4exp"

table(G4pred_bed$name)
```

```{r}
nearest_G4_0kb <- bedscout::annotate_nearby_features(
  ctcf,
  G4pred_bed,
  distance_cutoff = 0,
  ignore.strand = T,
  name_field = "name"
)
nearest_G4_1kb <- bedscout::annotate_nearby_features(
  ctcf,
  G4pred_bed,
  distance_cutoff = 1000,
  ignore.strand = T,
  name_field = "name"
)
nearest_G4_2.5kb <- bedscout::annotate_nearby_features(
  ctcf,
  G4pred_bed,
  distance_cutoff = 2500,
  ignore.strand = T,
  name_field = "name"
)
nearest_G4_5kb <- bedscout::annotate_nearby_features(
  ctcf,
  G4pred_bed,
  distance_cutoff = 5000,
  ignore.strand = T,
  name_field = "name"
)
nearest_G4_10kb <- bedscout::annotate_nearby_features(
  ctcf,
  G4pred_bed,
  distance_cutoff = 10000,
  ignore.strand = T,
  name_field = "name"
)


ctcf$nearest_G4 <- factor(">10kb",
                          levels = c("0kb", "<1kb", "<2.5kb", "<5kb", "<10kb", ">10kb"))
ctcf$nearest_G4[!is.na(nearest_G4_10kb$nearby_features)] <- "<10kb"
ctcf$nearest_G4[!is.na(nearest_G4_5kb$nearby_features)] <- "<5kb"
ctcf$nearest_G4[!is.na(nearest_G4_2.5kb$nearby_features)] <- "<2.5kb"
ctcf$nearest_G4[!is.na(nearest_G4_1kb$nearby_features)] <- "<1kb"
ctcf$nearest_G4[!is.na(nearest_G4_0kb$nearby_features)] <- "0kb"

results$nearest_G4 <- ctcf$nearest_G4
table(results$nearest_G4)
table(results$nearest_G4, results$pro)
```


```{r fig.width=5, fig.height=2}
mdf <- reshape2::melt(table(results$nearest_G4, results$pro))
ggplot(mdf, aes(Var1, Var2, fill = value)) +
  geom_tile(show.legend = F) + geom_text(aes(label = value)) +
  scale_fill_gradient(low = "white", high = "orange") + theme_minimal()
ggsave(glue("{plot_folder}Heatmap_CTCF_sites_by_predG4distance.pdf"),
       plot = last_plot())
```

```{r fig.width=3, fig.height=3}
ggviolin(
  results,
  x = "nearest_G4",
  y = "mean.mock",
  fill = mypal[1],
  add = "median_iqr"
) + coord_cartesian(ylim = c(0, 10)) + geom_hline(yintercept = 0, linetype =
                                                    "dotted") +
  stat_compare_means(
    aes(group = nearest_G4),
    label.y = 8,
    label.x = 2,
    size = 3
  )
ggsave(glue(
  "{plot_folder}Violin_CTCF_signal_by_predG4distance.pdf"),
  plot = last_plot())
```

```{r fig.width=3, fig.height=3}
ggviolin(
  results,
  x = "nearest_G4",
  y = "deseq.lfc.pds",
  fill = mypal[1],
  add = "median_iqr"
) + coord_cartesian(ylim = c(-0.5, 1.5)) + geom_hline(yintercept = 0, linewidth = 0.2) + geom_hline(
  yintercept = median(results$deseq.lfc.pds[results$nearest_G4 == "0kb"]),
  linetype = "dotted",
  linewidth = 0.2
) +
  stat_compare_means(label.y = 1.3,
                     label.x = 3,
                     size = 2)
ggsave(glue("{plot_folder}Violin_CTCF_PDS_lfc_by_predG4distance.pdf"),
       plot = last_plot())
```
```{r}
compare_means(deseq.lfc.pds ~ nearest_G4, results)
```

```{r fig.width=3, fig.height=3}
ggviolin(
  results,
  x = "nearest_G4",
  y = "deseq.lfc.pdc",
  fill = mypal[1],
  add = "median_iqr"
) + coord_cartesian(ylim = c(-0.5, 1.5)) + geom_hline(yintercept = 0, linewidth = 0.2) + geom_hline(
  yintercept = mean(results$deseq.lfc.pdc[results$nearest_G4 == "0kb"]),
  linetype = "dotted",
  linewidth = 0.2
) +
  stat_compare_means(label.y = 1.3,
                     label.x = 3,
                     size = 2)
ggsave(glue(
  "{plot_folder}Violin_CTCF_PhenDC3_lfc_by_predG4distance.pdf"
),
plot = last_plot())
```

```{r}
compare_means(deseq.lfc.pdc ~ nearest_G4, results)

```
```{r fig.width=5, fig.height=3}
ggviolin(
  results,
  x = "nearest_G4",
  y = "deseq.lfc.pds",
  fill = "pro",
  palette = mypal[c(1, 3, 5)],
  add = "median_iqr"
) + coord_cartesian(ylim = c(-0.5, 1.5)) + geom_hline(yintercept = 0, linewidth = 0.2) + geom_hline(
  yintercept = mean(results$deseq.lfc.pds[results$nearest_G4 == "0kb"]),
  linetype = "dotted",
  linewidth = 0.2
) +
  stat_compare_means(aes(group = pro),
                     label.y = 1.3,
                     label.x = 3,
                     size = 1.5)
ggsave(glue("{plot_folder}Violin_CTCF_PDS_lfc_by_predG4distance_pro.pdf"), plot = last_plot())
```
```{r}
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$pro == "noPro", ])
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$pro == "noPro", ]) %>%
write_tsv(., "pds_G4distance_plots-noPro-statistics.tsv")
  
```
```{r}
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$pro == "Pro", ]) %>%
  write_tsv(.,
            glue("{stat_output}pds_G4distance_plots-Pro-statistics.tsv"))

compare_means(deseq.lfc.pds ~ pro, group.by = "nearest_G4", results) %>%
  write_tsv(.,
            glue(
              "{stat_output}pds_G4distance_plots-between_prom_types_stats.tsv"
            ))

compare_means(deseq.lfc.pds ~ nearest_G4, group.by = "pro", results) %>%
  write_tsv(.,
            glue(
              "{stat_output}pds_G4distance_plots-within_prom_types_stats.tsv"
            ))

```
```{r fig.width=5, fig.height=3}
ggviolin(
  results,
  x = "nearest_G4",
  y = "deseq.lfc.pdc",
  fill = "pro",
  palette = mypal[c(1, 3, 5)],
  add = "median_iqr"
) + coord_cartesian(ylim = c(-0.5, 1.5)) + geom_hline(yintercept = 0, linewidth = 0.2) + geom_hline(
  yintercept = mean(results$deseq.lfc.pdc[results$nearest_G4 == "0kb"]),
  linetype = "dotted",
  linewidth = 0.2
) +
  stat_compare_means(aes(group = pro),
                     label.y = 1.3,
                     label.x = 3,
                     size = 1.5)
ggsave(glue(
  "{plot_folder}Violin_CTCF_PhenDC3_lfc_by_predG4distance_pro.pdf"
),
plot = last_plot())
```

```{r}
compare_means(deseq.lfc.pdc ~ nearest_G4, results[results$pro == "noPro", ]) %>%
  write_tsv(.,
            glue(
              "{stat_output}phendc_G4distance_plots-noPro-statistics.tsv"
            ))

compare_means(deseq.lfc.pdc ~ pro, group.by = "nearest_G4", results) %>%
  write_tsv(.,
            glue(
              "{stat_output}phendc_G4distance_plots-between_prom_types_stats.tsv"
            ))

compare_means(deseq.lfc.pdc ~ nearest_G4, group.by = "pro", results) %>%
  write_tsv(.,
            glue(
              "{stat_output}phendc_G4distance_plots-within_prom_types_stats.tsv"
            ))


```
